1 /* 2 * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "classfile/vmIntrinsics.hpp" 29 #include "code/codeBlob.hpp" 30 #include "compiler/compilerDefinitions.inline.hpp" 31 #include "jvm.h" 32 #include "logging/log.hpp" 33 #include "logging/logStream.hpp" 34 #include "memory/resourceArea.hpp" 35 #include "memory/universe.hpp" 36 #include "runtime/globals_extension.hpp" 37 #include "runtime/java.hpp" 38 #include "runtime/os.inline.hpp" 39 #include "runtime/stubCodeGenerator.hpp" 40 #include "runtime/vm_version.hpp" 41 #include "utilities/checkedCast.hpp" 42 #include "utilities/powerOfTwo.hpp" 43 #include "utilities/virtualizationSupport.hpp" 44 45 int VM_Version::_cpu; 46 int VM_Version::_model; 47 int VM_Version::_stepping; 48 bool VM_Version::_has_intel_jcc_erratum; 49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 50 51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 53 #undef DECLARE_CPU_FEATURE_FLAG 54 55 // Address of instruction which causes SEGV 56 address VM_Version::_cpuinfo_segv_addr = 0; 57 // Address of instruction after the one which causes SEGV 58 address VM_Version::_cpuinfo_cont_addr = 0; 59 60 static BufferBlob* stub_blob; 61 static const int stub_size = 2000; 62 63 extern "C" { 64 typedef void (*get_cpu_info_stub_t)(void*); 65 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 66 } 67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr; 68 static detect_virt_stub_t detect_virt_stub = nullptr; 69 70 #ifdef _LP64 71 72 bool VM_Version::supports_clflush() { 73 // clflush should always be available on x86_64 74 // if not we are in real trouble because we rely on it 75 // to flush the code cache. 76 // Unfortunately, Assembler::clflush is currently called as part 77 // of generation of the code cache flush routine. This happens 78 // under Universe::init before the processor features are set 79 // up. Assembler::flush calls this routine to check that clflush 80 // is allowed. So, we give the caller a free pass if Universe init 81 // is still in progress. 82 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 83 return true; 84 } 85 #endif 86 87 #define CPUID_STANDARD_FN 0x0 88 #define CPUID_STANDARD_FN_1 0x1 89 #define CPUID_STANDARD_FN_4 0x4 90 #define CPUID_STANDARD_FN_B 0xb 91 92 #define CPUID_EXTENDED_FN 0x80000000 93 #define CPUID_EXTENDED_FN_1 0x80000001 94 #define CPUID_EXTENDED_FN_2 0x80000002 95 #define CPUID_EXTENDED_FN_3 0x80000003 96 #define CPUID_EXTENDED_FN_4 0x80000004 97 #define CPUID_EXTENDED_FN_7 0x80000007 98 #define CPUID_EXTENDED_FN_8 0x80000008 99 100 class VM_Version_StubGenerator: public StubCodeGenerator { 101 public: 102 103 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 104 105 address generate_get_cpu_info() { 106 // Flags to test CPU type. 107 const uint32_t HS_EFL_AC = 0x40000; 108 const uint32_t HS_EFL_ID = 0x200000; 109 // Values for when we don't have a CPUID instruction. 110 const int CPU_FAMILY_SHIFT = 8; 111 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 112 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 113 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 114 115 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 116 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; 117 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 118 119 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 120 # define __ _masm-> 121 122 address start = __ pc(); 123 124 // 125 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 126 // 127 // LP64: rcx and rdx are first and second argument registers on windows 128 129 __ push(rbp); 130 #ifdef _LP64 131 __ mov(rbp, c_rarg0); // cpuid_info address 132 #else 133 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 134 #endif 135 __ push(rbx); 136 __ push(rsi); 137 __ pushf(); // preserve rbx, and flags 138 __ pop(rax); 139 __ push(rax); 140 __ mov(rcx, rax); 141 // 142 // if we are unable to change the AC flag, we have a 386 143 // 144 __ xorl(rax, HS_EFL_AC); 145 __ push(rax); 146 __ popf(); 147 __ pushf(); 148 __ pop(rax); 149 __ cmpptr(rax, rcx); 150 __ jccb(Assembler::notEqual, detect_486); 151 152 __ movl(rax, CPU_FAMILY_386); 153 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 154 __ jmp(done); 155 156 // 157 // If we are unable to change the ID flag, we have a 486 which does 158 // not support the "cpuid" instruction. 159 // 160 __ bind(detect_486); 161 __ mov(rax, rcx); 162 __ xorl(rax, HS_EFL_ID); 163 __ push(rax); 164 __ popf(); 165 __ pushf(); 166 __ pop(rax); 167 __ cmpptr(rcx, rax); 168 __ jccb(Assembler::notEqual, detect_586); 169 170 __ bind(cpu486); 171 __ movl(rax, CPU_FAMILY_486); 172 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 173 __ jmp(done); 174 175 // 176 // At this point, we have a chip which supports the "cpuid" instruction 177 // 178 __ bind(detect_586); 179 __ xorl(rax, rax); 180 __ cpuid(); 181 __ orl(rax, rax); 182 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 183 // value of at least 1, we give up and 184 // assume a 486 185 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 186 __ movl(Address(rsi, 0), rax); 187 __ movl(Address(rsi, 4), rbx); 188 __ movl(Address(rsi, 8), rcx); 189 __ movl(Address(rsi,12), rdx); 190 191 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 192 __ jccb(Assembler::belowEqual, std_cpuid4); 193 194 // 195 // cpuid(0xB) Processor Topology 196 // 197 __ movl(rax, 0xb); 198 __ xorl(rcx, rcx); // Threads level 199 __ cpuid(); 200 201 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 202 __ movl(Address(rsi, 0), rax); 203 __ movl(Address(rsi, 4), rbx); 204 __ movl(Address(rsi, 8), rcx); 205 __ movl(Address(rsi,12), rdx); 206 207 __ movl(rax, 0xb); 208 __ movl(rcx, 1); // Cores level 209 __ cpuid(); 210 __ push(rax); 211 __ andl(rax, 0x1f); // Determine if valid topology level 212 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 213 __ andl(rax, 0xffff); 214 __ pop(rax); 215 __ jccb(Assembler::equal, std_cpuid4); 216 217 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 218 __ movl(Address(rsi, 0), rax); 219 __ movl(Address(rsi, 4), rbx); 220 __ movl(Address(rsi, 8), rcx); 221 __ movl(Address(rsi,12), rdx); 222 223 __ movl(rax, 0xb); 224 __ movl(rcx, 2); // Packages level 225 __ cpuid(); 226 __ push(rax); 227 __ andl(rax, 0x1f); // Determine if valid topology level 228 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 229 __ andl(rax, 0xffff); 230 __ pop(rax); 231 __ jccb(Assembler::equal, std_cpuid4); 232 233 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 234 __ movl(Address(rsi, 0), rax); 235 __ movl(Address(rsi, 4), rbx); 236 __ movl(Address(rsi, 8), rcx); 237 __ movl(Address(rsi,12), rdx); 238 239 // 240 // cpuid(0x4) Deterministic cache params 241 // 242 __ bind(std_cpuid4); 243 __ movl(rax, 4); 244 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 245 __ jccb(Assembler::greater, std_cpuid1); 246 247 __ xorl(rcx, rcx); // L1 cache 248 __ cpuid(); 249 __ push(rax); 250 __ andl(rax, 0x1f); // Determine if valid cache parameters used 251 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 252 __ pop(rax); 253 __ jccb(Assembler::equal, std_cpuid1); 254 255 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 256 __ movl(Address(rsi, 0), rax); 257 __ movl(Address(rsi, 4), rbx); 258 __ movl(Address(rsi, 8), rcx); 259 __ movl(Address(rsi,12), rdx); 260 261 // 262 // Standard cpuid(0x1) 263 // 264 __ bind(std_cpuid1); 265 __ movl(rax, 1); 266 __ cpuid(); 267 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 268 __ movl(Address(rsi, 0), rax); 269 __ movl(Address(rsi, 4), rbx); 270 __ movl(Address(rsi, 8), rcx); 271 __ movl(Address(rsi,12), rdx); 272 273 // 274 // Check if OS has enabled XGETBV instruction to access XCR0 275 // (OSXSAVE feature flag) and CPU supports AVX 276 // 277 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 278 __ cmpl(rcx, 0x18000000); 279 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 280 281 // 282 // XCR0, XFEATURE_ENABLED_MASK register 283 // 284 __ xorl(rcx, rcx); // zero for XCR0 register 285 __ xgetbv(); 286 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 287 __ movl(Address(rsi, 0), rax); 288 __ movl(Address(rsi, 4), rdx); 289 290 // 291 // cpuid(0x7) Structured Extended Features 292 // 293 __ bind(sef_cpuid); 294 __ movl(rax, 7); 295 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 296 __ jccb(Assembler::greater, ext_cpuid); 297 298 __ xorl(rcx, rcx); 299 __ cpuid(); 300 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 301 __ movl(Address(rsi, 0), rax); 302 __ movl(Address(rsi, 4), rbx); 303 __ movl(Address(rsi, 8), rcx); 304 __ movl(Address(rsi, 12), rdx); 305 306 // 307 // Extended cpuid(0x80000000) 308 // 309 __ bind(ext_cpuid); 310 __ movl(rax, 0x80000000); 311 __ cpuid(); 312 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 313 __ jcc(Assembler::belowEqual, done); 314 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 315 __ jcc(Assembler::belowEqual, ext_cpuid1); 316 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 317 __ jccb(Assembler::belowEqual, ext_cpuid5); 318 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 319 __ jccb(Assembler::belowEqual, ext_cpuid7); 320 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 321 __ jccb(Assembler::belowEqual, ext_cpuid8); 322 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 323 __ jccb(Assembler::below, ext_cpuid8); 324 // 325 // Extended cpuid(0x8000001E) 326 // 327 __ movl(rax, 0x8000001E); 328 __ cpuid(); 329 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 330 __ movl(Address(rsi, 0), rax); 331 __ movl(Address(rsi, 4), rbx); 332 __ movl(Address(rsi, 8), rcx); 333 __ movl(Address(rsi,12), rdx); 334 335 // 336 // Extended cpuid(0x80000008) 337 // 338 __ bind(ext_cpuid8); 339 __ movl(rax, 0x80000008); 340 __ cpuid(); 341 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 342 __ movl(Address(rsi, 0), rax); 343 __ movl(Address(rsi, 4), rbx); 344 __ movl(Address(rsi, 8), rcx); 345 __ movl(Address(rsi,12), rdx); 346 347 // 348 // Extended cpuid(0x80000007) 349 // 350 __ bind(ext_cpuid7); 351 __ movl(rax, 0x80000007); 352 __ cpuid(); 353 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 354 __ movl(Address(rsi, 0), rax); 355 __ movl(Address(rsi, 4), rbx); 356 __ movl(Address(rsi, 8), rcx); 357 __ movl(Address(rsi,12), rdx); 358 359 // 360 // Extended cpuid(0x80000005) 361 // 362 __ bind(ext_cpuid5); 363 __ movl(rax, 0x80000005); 364 __ cpuid(); 365 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 366 __ movl(Address(rsi, 0), rax); 367 __ movl(Address(rsi, 4), rbx); 368 __ movl(Address(rsi, 8), rcx); 369 __ movl(Address(rsi,12), rdx); 370 371 // 372 // Extended cpuid(0x80000001) 373 // 374 __ bind(ext_cpuid1); 375 __ movl(rax, 0x80000001); 376 __ cpuid(); 377 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 378 __ movl(Address(rsi, 0), rax); 379 __ movl(Address(rsi, 4), rbx); 380 __ movl(Address(rsi, 8), rcx); 381 __ movl(Address(rsi,12), rdx); 382 383 // 384 // Check if OS has enabled XGETBV instruction to access XCR0 385 // (OSXSAVE feature flag) and CPU supports AVX 386 // 387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 388 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 389 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 390 __ cmpl(rcx, 0x18000000); 391 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 392 393 __ movl(rax, 0x6); 394 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 395 __ cmpl(rax, 0x6); 396 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 397 398 // we need to bridge farther than imm8, so we use this island as a thunk 399 __ bind(done); 400 __ jmp(wrapup); 401 402 __ bind(start_simd_check); 403 // 404 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 405 // registers are not restored after a signal processing. 406 // Generate SEGV here (reference through null) 407 // and check upper YMM/ZMM bits after it. 408 // 409 int saved_useavx = UseAVX; 410 int saved_usesse = UseSSE; 411 412 // If UseAVX is uninitialized or is set by the user to include EVEX 413 if (use_evex) { 414 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 415 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 416 __ movl(rax, 0x10000); 417 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 418 __ cmpl(rax, 0x10000); 419 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 420 // check _cpuid_info.xem_xcr0_eax.bits.opmask 421 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 422 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 423 __ movl(rax, 0xE0); 424 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 425 __ cmpl(rax, 0xE0); 426 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 427 428 if (FLAG_IS_DEFAULT(UseAVX)) { 429 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 430 __ movl(rax, Address(rsi, 0)); 431 __ cmpl(rax, 0x50654); // If it is Skylake 432 __ jcc(Assembler::equal, legacy_setup); 433 } 434 // EVEX setup: run in lowest evex mode 435 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 436 UseAVX = 3; 437 UseSSE = 2; 438 #ifdef _WINDOWS 439 // xmm5-xmm15 are not preserved by caller on windows 440 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 441 __ subptr(rsp, 64); 442 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 443 #ifdef _LP64 444 __ subptr(rsp, 64); 445 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 446 __ subptr(rsp, 64); 447 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 448 #endif // _LP64 449 #endif // _WINDOWS 450 451 // load value into all 64 bytes of zmm7 register 452 __ movl(rcx, VM_Version::ymm_test_value()); 453 __ movdl(xmm0, rcx); 454 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 455 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 456 #ifdef _LP64 457 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 458 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 459 #endif 460 VM_Version::clean_cpuFeatures(); 461 __ jmp(save_restore_except); 462 } 463 464 __ bind(legacy_setup); 465 // AVX setup 466 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 467 UseAVX = 1; 468 UseSSE = 2; 469 #ifdef _WINDOWS 470 __ subptr(rsp, 32); 471 __ vmovdqu(Address(rsp, 0), xmm7); 472 #ifdef _LP64 473 __ subptr(rsp, 32); 474 __ vmovdqu(Address(rsp, 0), xmm8); 475 __ subptr(rsp, 32); 476 __ vmovdqu(Address(rsp, 0), xmm15); 477 #endif // _LP64 478 #endif // _WINDOWS 479 480 // load value into all 32 bytes of ymm7 register 481 __ movl(rcx, VM_Version::ymm_test_value()); 482 483 __ movdl(xmm0, rcx); 484 __ pshufd(xmm0, xmm0, 0x00); 485 __ vinsertf128_high(xmm0, xmm0); 486 __ vmovdqu(xmm7, xmm0); 487 #ifdef _LP64 488 __ vmovdqu(xmm8, xmm0); 489 __ vmovdqu(xmm15, xmm0); 490 #endif 491 VM_Version::clean_cpuFeatures(); 492 493 __ bind(save_restore_except); 494 __ xorl(rsi, rsi); 495 VM_Version::set_cpuinfo_segv_addr(__ pc()); 496 // Generate SEGV 497 __ movl(rax, Address(rsi, 0)); 498 499 VM_Version::set_cpuinfo_cont_addr(__ pc()); 500 // Returns here after signal. Save xmm0 to check it later. 501 502 // If UseAVX is uninitialized or is set by the user to include EVEX 503 if (use_evex) { 504 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 505 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 506 __ movl(rax, 0x10000); 507 __ andl(rax, Address(rsi, 4)); 508 __ cmpl(rax, 0x10000); 509 __ jcc(Assembler::notEqual, legacy_save_restore); 510 // check _cpuid_info.xem_xcr0_eax.bits.opmask 511 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 512 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 513 __ movl(rax, 0xE0); 514 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 515 __ cmpl(rax, 0xE0); 516 __ jcc(Assembler::notEqual, legacy_save_restore); 517 518 if (FLAG_IS_DEFAULT(UseAVX)) { 519 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 520 __ movl(rax, Address(rsi, 0)); 521 __ cmpl(rax, 0x50654); // If it is Skylake 522 __ jcc(Assembler::equal, legacy_save_restore); 523 } 524 // EVEX check: run in lowest evex mode 525 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 526 UseAVX = 3; 527 UseSSE = 2; 528 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 529 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 530 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 531 #ifdef _LP64 532 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 533 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 534 #endif 535 536 #ifdef _WINDOWS 537 #ifdef _LP64 538 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 539 __ addptr(rsp, 64); 540 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 541 __ addptr(rsp, 64); 542 #endif // _LP64 543 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 544 __ addptr(rsp, 64); 545 #endif // _WINDOWS 546 generate_vzeroupper(wrapup); 547 VM_Version::clean_cpuFeatures(); 548 UseAVX = saved_useavx; 549 UseSSE = saved_usesse; 550 __ jmp(wrapup); 551 } 552 553 __ bind(legacy_save_restore); 554 // AVX check 555 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 556 UseAVX = 1; 557 UseSSE = 2; 558 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 559 __ vmovdqu(Address(rsi, 0), xmm0); 560 __ vmovdqu(Address(rsi, 32), xmm7); 561 #ifdef _LP64 562 __ vmovdqu(Address(rsi, 64), xmm8); 563 __ vmovdqu(Address(rsi, 96), xmm15); 564 #endif 565 566 #ifdef _WINDOWS 567 #ifdef _LP64 568 __ vmovdqu(xmm15, Address(rsp, 0)); 569 __ addptr(rsp, 32); 570 __ vmovdqu(xmm8, Address(rsp, 0)); 571 __ addptr(rsp, 32); 572 #endif // _LP64 573 __ vmovdqu(xmm7, Address(rsp, 0)); 574 __ addptr(rsp, 32); 575 #endif // _WINDOWS 576 generate_vzeroupper(wrapup); 577 VM_Version::clean_cpuFeatures(); 578 UseAVX = saved_useavx; 579 UseSSE = saved_usesse; 580 581 __ bind(wrapup); 582 __ popf(); 583 __ pop(rsi); 584 __ pop(rbx); 585 __ pop(rbp); 586 __ ret(0); 587 588 # undef __ 589 590 return start; 591 }; 592 void generate_vzeroupper(Label& L_wrapup) { 593 # define __ _masm-> 594 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 595 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 596 __ jcc(Assembler::notEqual, L_wrapup); 597 __ movl(rcx, 0x0FFF0FF0); 598 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 599 __ andl(rcx, Address(rsi, 0)); 600 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 601 __ jcc(Assembler::equal, L_wrapup); 602 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 603 __ jcc(Assembler::equal, L_wrapup); 604 // vzeroupper() will use a pre-computed instruction sequence that we 605 // can't compute until after we've determined CPU capabilities. Use 606 // uncached variant here directly to be able to bootstrap correctly 607 __ vzeroupper_uncached(); 608 # undef __ 609 } 610 address generate_detect_virt() { 611 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 612 # define __ _masm-> 613 614 address start = __ pc(); 615 616 // Evacuate callee-saved registers 617 __ push(rbp); 618 __ push(rbx); 619 __ push(rsi); // for Windows 620 621 #ifdef _LP64 622 __ mov(rax, c_rarg0); // CPUID leaf 623 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 624 #else 625 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 626 __ movptr(rsi, Address(rsp, 20)); // register array address 627 #endif 628 629 __ cpuid(); 630 631 // Store result to register array 632 __ movl(Address(rsi, 0), rax); 633 __ movl(Address(rsi, 4), rbx); 634 __ movl(Address(rsi, 8), rcx); 635 __ movl(Address(rsi, 12), rdx); 636 637 // Epilogue 638 __ pop(rsi); 639 __ pop(rbx); 640 __ pop(rbp); 641 __ ret(0); 642 643 # undef __ 644 645 return start; 646 }; 647 648 649 address generate_getCPUIDBrandString(void) { 650 // Flags to test CPU type. 651 const uint32_t HS_EFL_AC = 0x40000; 652 const uint32_t HS_EFL_ID = 0x200000; 653 // Values for when we don't have a CPUID instruction. 654 const int CPU_FAMILY_SHIFT = 8; 655 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 656 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 657 658 Label detect_486, cpu486, detect_586, done, ext_cpuid; 659 660 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 661 # define __ _masm-> 662 663 address start = __ pc(); 664 665 // 666 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 667 // 668 // LP64: rcx and rdx are first and second argument registers on windows 669 670 __ push(rbp); 671 #ifdef _LP64 672 __ mov(rbp, c_rarg0); // cpuid_info address 673 #else 674 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 675 #endif 676 __ push(rbx); 677 __ push(rsi); 678 __ pushf(); // preserve rbx, and flags 679 __ pop(rax); 680 __ push(rax); 681 __ mov(rcx, rax); 682 // 683 // if we are unable to change the AC flag, we have a 386 684 // 685 __ xorl(rax, HS_EFL_AC); 686 __ push(rax); 687 __ popf(); 688 __ pushf(); 689 __ pop(rax); 690 __ cmpptr(rax, rcx); 691 __ jccb(Assembler::notEqual, detect_486); 692 693 __ movl(rax, CPU_FAMILY_386); 694 __ jmp(done); 695 696 // 697 // If we are unable to change the ID flag, we have a 486 which does 698 // not support the "cpuid" instruction. 699 // 700 __ bind(detect_486); 701 __ mov(rax, rcx); 702 __ xorl(rax, HS_EFL_ID); 703 __ push(rax); 704 __ popf(); 705 __ pushf(); 706 __ pop(rax); 707 __ cmpptr(rcx, rax); 708 __ jccb(Assembler::notEqual, detect_586); 709 710 __ bind(cpu486); 711 __ movl(rax, CPU_FAMILY_486); 712 __ jmp(done); 713 714 // 715 // At this point, we have a chip which supports the "cpuid" instruction 716 // 717 __ bind(detect_586); 718 __ xorl(rax, rax); 719 __ cpuid(); 720 __ orl(rax, rax); 721 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 722 // value of at least 1, we give up and 723 // assume a 486 724 725 // 726 // Extended cpuid(0x80000000) for processor brand string detection 727 // 728 __ bind(ext_cpuid); 729 __ movl(rax, CPUID_EXTENDED_FN); 730 __ cpuid(); 731 __ cmpl(rax, CPUID_EXTENDED_FN_4); 732 __ jcc(Assembler::below, done); 733 734 // 735 // Extended cpuid(0x80000002) // first 16 bytes in brand string 736 // 737 __ movl(rax, CPUID_EXTENDED_FN_2); 738 __ cpuid(); 739 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 740 __ movl(Address(rsi, 0), rax); 741 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 742 __ movl(Address(rsi, 0), rbx); 743 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 744 __ movl(Address(rsi, 0), rcx); 745 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 746 __ movl(Address(rsi,0), rdx); 747 748 // 749 // Extended cpuid(0x80000003) // next 16 bytes in brand string 750 // 751 __ movl(rax, CPUID_EXTENDED_FN_3); 752 __ cpuid(); 753 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 754 __ movl(Address(rsi, 0), rax); 755 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 756 __ movl(Address(rsi, 0), rbx); 757 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 758 __ movl(Address(rsi, 0), rcx); 759 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 760 __ movl(Address(rsi,0), rdx); 761 762 // 763 // Extended cpuid(0x80000004) // last 16 bytes in brand string 764 // 765 __ movl(rax, CPUID_EXTENDED_FN_4); 766 __ cpuid(); 767 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 768 __ movl(Address(rsi, 0), rax); 769 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 770 __ movl(Address(rsi, 0), rbx); 771 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 772 __ movl(Address(rsi, 0), rcx); 773 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 774 __ movl(Address(rsi,0), rdx); 775 776 // 777 // return 778 // 779 __ bind(done); 780 __ popf(); 781 __ pop(rsi); 782 __ pop(rbx); 783 __ pop(rbp); 784 __ ret(0); 785 786 # undef __ 787 788 return start; 789 }; 790 }; 791 792 void VM_Version::get_processor_features() { 793 794 _cpu = 4; // 486 by default 795 _model = 0; 796 _stepping = 0; 797 _features = 0; 798 _logical_processors_per_package = 1; 799 // i486 internal cache is both I&D and has a 16-byte line size 800 _L1_data_cache_line_size = 16; 801 802 // Get raw processor info 803 804 get_cpu_info_stub(&_cpuid_info); 805 806 assert_is_initialized(); 807 _cpu = extended_cpu_family(); 808 _model = extended_cpu_model(); 809 _stepping = cpu_stepping(); 810 811 if (cpu_family() > 4) { // it supports CPUID 812 _features = feature_flags(); // These can be changed by VM settings 813 _cpu_features = _features; // Preserve features 814 // Logical processors are only available on P4s and above, 815 // and only if hyperthreading is available. 816 _logical_processors_per_package = logical_processor_count(); 817 _L1_data_cache_line_size = L1_line_size(); 818 } 819 820 // xchg and xadd instructions 821 _supports_atomic_getset4 = true; 822 _supports_atomic_getadd4 = true; 823 LP64_ONLY(_supports_atomic_getset8 = true); 824 LP64_ONLY(_supports_atomic_getadd8 = true); 825 826 #ifdef _LP64 827 // OS should support SSE for x64 and hardware should support at least SSE2. 828 if (!VM_Version::supports_sse2()) { 829 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 830 } 831 // in 64 bit the use of SSE2 is the minimum 832 if (UseSSE < 2) UseSSE = 2; 833 #endif 834 835 #ifdef AMD64 836 // flush_icache_stub have to be generated first. 837 // That is why Icache line size is hard coded in ICache class, 838 // see icache_x86.hpp. It is also the reason why we can't use 839 // clflush instruction in 32-bit VM since it could be running 840 // on CPU which does not support it. 841 // 842 // The only thing we can do is to verify that flushed 843 // ICache::line_size has correct value. 844 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 845 // clflush_size is size in quadwords (8 bytes). 846 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 847 #endif 848 849 #ifdef _LP64 850 // assigning this field effectively enables Unsafe.writebackMemory() 851 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 852 // that is only implemented on x86_64 and only if the OS plays ball 853 if (os::supports_map_sync()) { 854 // publish data cache line flush size to generic field, otherwise 855 // let if default to zero thereby disabling writeback 856 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 857 } 858 #endif 859 860 // Check if processor has Intel Ecore 861 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 && 862 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) { 863 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true); 864 } 865 866 if (UseSSE < 4) { 867 _features &= ~CPU_SSE4_1; 868 _features &= ~CPU_SSE4_2; 869 } 870 871 if (UseSSE < 3) { 872 _features &= ~CPU_SSE3; 873 _features &= ~CPU_SSSE3; 874 _features &= ~CPU_SSE4A; 875 } 876 877 if (UseSSE < 2) 878 _features &= ~CPU_SSE2; 879 880 if (UseSSE < 1) 881 _features &= ~CPU_SSE; 882 883 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 884 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 885 UseAVX = 0; 886 } 887 888 // UseSSE is set to the smaller of what hardware supports and what 889 // the command line requires. I.e., you cannot set UseSSE to 2 on 890 // older Pentiums which do not support it. 891 int use_sse_limit = 0; 892 if (UseSSE > 0) { 893 if (UseSSE > 3 && supports_sse4_1()) { 894 use_sse_limit = 4; 895 } else if (UseSSE > 2 && supports_sse3()) { 896 use_sse_limit = 3; 897 } else if (UseSSE > 1 && supports_sse2()) { 898 use_sse_limit = 2; 899 } else if (UseSSE > 0 && supports_sse()) { 900 use_sse_limit = 1; 901 } else { 902 use_sse_limit = 0; 903 } 904 } 905 if (FLAG_IS_DEFAULT(UseSSE)) { 906 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 907 } else if (UseSSE > use_sse_limit) { 908 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 909 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 910 } 911 912 // first try initial setting and detect what we can support 913 int use_avx_limit = 0; 914 if (UseAVX > 0) { 915 if (UseSSE < 4) { 916 // Don't use AVX if SSE is unavailable or has been disabled. 917 use_avx_limit = 0; 918 } else if (UseAVX > 2 && supports_evex()) { 919 use_avx_limit = 3; 920 } else if (UseAVX > 1 && supports_avx2()) { 921 use_avx_limit = 2; 922 } else if (UseAVX > 0 && supports_avx()) { 923 use_avx_limit = 1; 924 } else { 925 use_avx_limit = 0; 926 } 927 } 928 if (FLAG_IS_DEFAULT(UseAVX)) { 929 // Don't use AVX-512 on older Skylakes unless explicitly requested. 930 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 931 FLAG_SET_DEFAULT(UseAVX, 2); 932 } else { 933 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 934 } 935 } 936 if (UseAVX > use_avx_limit) { 937 if (UseSSE < 4) { 938 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 939 } else { 940 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 941 } 942 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 943 } 944 945 if (UseAVX < 3) { 946 _features &= ~CPU_AVX512F; 947 _features &= ~CPU_AVX512DQ; 948 _features &= ~CPU_AVX512CD; 949 _features &= ~CPU_AVX512BW; 950 _features &= ~CPU_AVX512VL; 951 _features &= ~CPU_AVX512_VPOPCNTDQ; 952 _features &= ~CPU_AVX512_VPCLMULQDQ; 953 _features &= ~CPU_AVX512_VAES; 954 _features &= ~CPU_AVX512_VNNI; 955 _features &= ~CPU_AVX512_VBMI; 956 _features &= ~CPU_AVX512_VBMI2; 957 _features &= ~CPU_AVX512_BITALG; 958 _features &= ~CPU_AVX512_IFMA; 959 } 960 961 if (UseAVX < 2) 962 _features &= ~CPU_AVX2; 963 964 if (UseAVX < 1) { 965 _features &= ~CPU_AVX; 966 _features &= ~CPU_VZEROUPPER; 967 _features &= ~CPU_F16C; 968 } 969 970 if (logical_processors_per_package() == 1) { 971 // HT processor could be installed on a system which doesn't support HT. 972 _features &= ~CPU_HT; 973 } 974 975 if (is_intel()) { // Intel cpus specific settings 976 if (is_knights_family()) { 977 _features &= ~CPU_VZEROUPPER; 978 _features &= ~CPU_AVX512BW; 979 _features &= ~CPU_AVX512VL; 980 _features &= ~CPU_AVX512DQ; 981 _features &= ~CPU_AVX512_VNNI; 982 _features &= ~CPU_AVX512_VAES; 983 _features &= ~CPU_AVX512_VPOPCNTDQ; 984 _features &= ~CPU_AVX512_VPCLMULQDQ; 985 _features &= ~CPU_AVX512_VBMI; 986 _features &= ~CPU_AVX512_VBMI2; 987 _features &= ~CPU_CLWB; 988 _features &= ~CPU_FLUSHOPT; 989 _features &= ~CPU_GFNI; 990 _features &= ~CPU_AVX512_BITALG; 991 _features &= ~CPU_AVX512_IFMA; 992 } 993 } 994 995 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 996 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 997 } else { 998 _has_intel_jcc_erratum = IntelJccErratumMitigation; 999 } 1000 1001 char buf[1024]; 1002 int res = jio_snprintf( 1003 buf, sizeof(buf), 1004 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 1005 cores_per_cpu(), threads_per_core(), 1006 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 1007 assert(res > 0, "not enough temporary space allocated"); 1008 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1009 1010 _features_string = os::strdup(buf); 1011 1012 // Use AES instructions if available. 1013 if (supports_aes()) { 1014 if (FLAG_IS_DEFAULT(UseAES)) { 1015 FLAG_SET_DEFAULT(UseAES, true); 1016 } 1017 if (!UseAES) { 1018 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1019 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1020 } 1021 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1022 } else { 1023 if (UseSSE > 2) { 1024 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1025 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1026 } 1027 } else { 1028 // The AES intrinsic stubs require AES instruction support (of course) 1029 // but also require sse3 mode or higher for instructions it use. 1030 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1031 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1032 } 1033 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1034 } 1035 1036 // --AES-CTR begins-- 1037 if (!UseAESIntrinsics) { 1038 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1039 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1040 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1041 } 1042 } else { 1043 if (supports_sse4_1()) { 1044 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1045 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1046 } 1047 } else { 1048 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1049 // but also require sse4.1 mode or higher for instructions it use. 1050 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1051 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1052 } 1053 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1054 } 1055 } 1056 // --AES-CTR ends-- 1057 } 1058 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1059 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1060 warning("AES instructions are not available on this CPU"); 1061 FLAG_SET_DEFAULT(UseAES, false); 1062 } 1063 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1064 warning("AES intrinsics are not available on this CPU"); 1065 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1066 } 1067 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1068 warning("AES-CTR intrinsics are not available on this CPU"); 1069 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1070 } 1071 } 1072 1073 // Use CLMUL instructions if available. 1074 if (supports_clmul()) { 1075 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1076 UseCLMUL = true; 1077 } 1078 } else if (UseCLMUL) { 1079 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1080 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1081 FLAG_SET_DEFAULT(UseCLMUL, false); 1082 } 1083 1084 if (UseCLMUL && (UseSSE > 2)) { 1085 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1086 UseCRC32Intrinsics = true; 1087 } 1088 } else if (UseCRC32Intrinsics) { 1089 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1090 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1091 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1092 } 1093 1094 #ifdef _LP64 1095 if (supports_avx2()) { 1096 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1097 UseAdler32Intrinsics = true; 1098 } 1099 } else if (UseAdler32Intrinsics) { 1100 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1101 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1102 } 1103 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1104 } 1105 #else 1106 if (UseAdler32Intrinsics) { 1107 warning("Adler32Intrinsics not available on this CPU."); 1108 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1109 } 1110 #endif 1111 1112 if (supports_sse4_2() && supports_clmul()) { 1113 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1114 UseCRC32CIntrinsics = true; 1115 } 1116 } else if (UseCRC32CIntrinsics) { 1117 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1118 warning("CRC32C intrinsics are not available on this CPU"); 1119 } 1120 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1121 } 1122 1123 // GHASH/GCM intrinsics 1124 if (UseCLMUL && (UseSSE > 2)) { 1125 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1126 UseGHASHIntrinsics = true; 1127 } 1128 } else if (UseGHASHIntrinsics) { 1129 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1130 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1131 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1132 } 1133 1134 #ifdef _LP64 1135 // ChaCha20 Intrinsics 1136 // As long as the system supports AVX as a baseline we can do a 1137 // SIMD-enabled block function. StubGenerator makes the determination 1138 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1139 // version. 1140 if (UseAVX >= 1) { 1141 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1142 UseChaCha20Intrinsics = true; 1143 } 1144 } else if (UseChaCha20Intrinsics) { 1145 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1146 warning("ChaCha20 intrinsic requires AVX instructions"); 1147 } 1148 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1149 } 1150 #else 1151 // No support currently for ChaCha20 intrinsics on 32-bit platforms 1152 if (UseChaCha20Intrinsics) { 1153 warning("ChaCha20 intrinsics are not available on this CPU."); 1154 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1155 } 1156 #endif // _LP64 1157 1158 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1159 if (UseAVX >= 2) { 1160 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1161 UseBASE64Intrinsics = true; 1162 } 1163 } else if (UseBASE64Intrinsics) { 1164 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1165 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1166 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1167 } 1168 1169 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1170 if (FLAG_IS_DEFAULT(UseFMA)) { 1171 UseFMA = true; 1172 } 1173 } else if (UseFMA) { 1174 warning("FMA instructions are not available on this CPU"); 1175 FLAG_SET_DEFAULT(UseFMA, false); 1176 } 1177 1178 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1179 UseMD5Intrinsics = true; 1180 } 1181 1182 if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) { 1183 if (FLAG_IS_DEFAULT(UseSHA)) { 1184 UseSHA = true; 1185 } 1186 } else if (UseSHA) { 1187 warning("SHA instructions are not available on this CPU"); 1188 FLAG_SET_DEFAULT(UseSHA, false); 1189 } 1190 1191 if (supports_sha() && supports_sse4_1() && UseSHA) { 1192 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1193 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1194 } 1195 } else if (UseSHA1Intrinsics) { 1196 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1197 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1198 } 1199 1200 if (supports_sse4_1() && UseSHA) { 1201 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1202 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1203 } 1204 } else if (UseSHA256Intrinsics) { 1205 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1206 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1207 } 1208 1209 #ifdef _LP64 1210 // These are only supported on 64-bit 1211 if (UseSHA && supports_avx2() && supports_bmi2()) { 1212 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1213 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1214 } 1215 } else 1216 #endif 1217 if (UseSHA512Intrinsics) { 1218 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1219 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1220 } 1221 1222 if (UseSHA3Intrinsics) { 1223 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1224 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1225 } 1226 1227 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1228 FLAG_SET_DEFAULT(UseSHA, false); 1229 } 1230 1231 if (!supports_rtm() && UseRTMLocking) { 1232 vm_exit_during_initialization("RTM instructions are not available on this CPU"); 1233 } 1234 1235 #if INCLUDE_RTM_OPT 1236 if (UseRTMLocking) { 1237 if (!CompilerConfig::is_c2_enabled()) { 1238 // Only C2 does RTM locking optimization. 1239 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1240 } 1241 if (is_intel_family_core()) { 1242 if ((_model == CPU_MODEL_HASWELL_E3) || 1243 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || 1244 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { 1245 // currently a collision between SKL and HSW_E3 1246 if (!UnlockExperimentalVMOptions && UseAVX < 3) { 1247 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " 1248 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); 1249 } else { 1250 warning("UseRTMLocking is only available as experimental option on this platform."); 1251 } 1252 } 1253 } 1254 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { 1255 // RTM locking should be used only for applications with 1256 // high lock contention. For now we do not use it by default. 1257 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); 1258 } 1259 } else { // !UseRTMLocking 1260 if (UseRTMForStackLocks) { 1261 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { 1262 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); 1263 } 1264 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); 1265 } 1266 if (UseRTMDeopt) { 1267 FLAG_SET_DEFAULT(UseRTMDeopt, false); 1268 } 1269 if (PrintPreciseRTMLockingStatistics) { 1270 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); 1271 } 1272 } 1273 #else 1274 if (UseRTMLocking) { 1275 // Only C2 does RTM locking optimization. 1276 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1277 } 1278 #endif 1279 1280 #ifdef COMPILER2 1281 if (UseFPUForSpilling) { 1282 if (UseSSE < 2) { 1283 // Only supported with SSE2+ 1284 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1285 } 1286 } 1287 #endif 1288 1289 #if COMPILER2_OR_JVMCI 1290 int max_vector_size = 0; 1291 if (UseSSE < 2) { 1292 // Vectors (in XMM) are only supported with SSE2+ 1293 // SSE is always 2 on x64. 1294 max_vector_size = 0; 1295 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1296 // 16 byte vectors (in XMM) are supported with SSE2+ 1297 max_vector_size = 16; 1298 } else if (UseAVX == 1 || UseAVX == 2) { 1299 // 32 bytes vectors (in YMM) are only supported with AVX+ 1300 max_vector_size = 32; 1301 } else if (UseAVX > 2) { 1302 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1303 max_vector_size = 64; 1304 } 1305 1306 #ifdef _LP64 1307 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1308 #else 1309 int min_vector_size = 0; 1310 #endif 1311 1312 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1313 if (MaxVectorSize < min_vector_size) { 1314 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1315 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1316 } 1317 if (MaxVectorSize > max_vector_size) { 1318 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1319 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1320 } 1321 if (!is_power_of_2(MaxVectorSize)) { 1322 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1323 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1324 } 1325 } else { 1326 // If default, use highest supported configuration 1327 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1328 } 1329 1330 #if defined(COMPILER2) && defined(ASSERT) 1331 if (MaxVectorSize > 0) { 1332 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1333 tty->print_cr("State of YMM registers after signal handle:"); 1334 int nreg = 2 LP64_ONLY(+2); 1335 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1336 for (int i = 0; i < nreg; i++) { 1337 tty->print("YMM%s:", ymm_name[i]); 1338 for (int j = 7; j >=0; j--) { 1339 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1340 } 1341 tty->cr(); 1342 } 1343 } 1344 } 1345 #endif // COMPILER2 && ASSERT 1346 1347 #ifdef _LP64 1348 if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) { 1349 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1350 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1351 } 1352 } else 1353 #endif 1354 if (UsePoly1305Intrinsics) { 1355 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1356 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1357 } 1358 1359 #ifdef _LP64 1360 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1361 UseMultiplyToLenIntrinsic = true; 1362 } 1363 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1364 UseSquareToLenIntrinsic = true; 1365 } 1366 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1367 UseMulAddIntrinsic = true; 1368 } 1369 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1370 UseMontgomeryMultiplyIntrinsic = true; 1371 } 1372 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1373 UseMontgomerySquareIntrinsic = true; 1374 } 1375 #else 1376 if (UseMultiplyToLenIntrinsic) { 1377 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1378 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1379 } 1380 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1381 } 1382 if (UseMontgomeryMultiplyIntrinsic) { 1383 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1384 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1385 } 1386 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1387 } 1388 if (UseMontgomerySquareIntrinsic) { 1389 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1390 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1391 } 1392 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1393 } 1394 if (UseSquareToLenIntrinsic) { 1395 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1396 warning("squareToLen intrinsic is not available in 32-bit VM"); 1397 } 1398 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1399 } 1400 if (UseMulAddIntrinsic) { 1401 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1402 warning("mulAdd intrinsic is not available in 32-bit VM"); 1403 } 1404 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1405 } 1406 #endif // _LP64 1407 #endif // COMPILER2_OR_JVMCI 1408 1409 // On new cpus instructions which update whole XMM register should be used 1410 // to prevent partial register stall due to dependencies on high half. 1411 // 1412 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1413 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1414 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1415 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1416 1417 1418 if (is_zx()) { // ZX cpus specific settings 1419 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1420 UseStoreImmI16 = false; // don't use it on ZX cpus 1421 } 1422 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1423 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1424 // Use it on all ZX cpus 1425 UseAddressNop = true; 1426 } 1427 } 1428 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1429 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1430 } 1431 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1432 if (supports_sse3()) { 1433 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1434 } else { 1435 UseXmmRegToRegMoveAll = false; 1436 } 1437 } 1438 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1439 #ifdef COMPILER2 1440 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1441 // For new ZX cpus do the next optimization: 1442 // don't align the beginning of a loop if there are enough instructions 1443 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1444 // in current fetch line (OptoLoopAlignment) or the padding 1445 // is big (> MaxLoopPad). 1446 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1447 // generated NOP instructions. 11 is the largest size of one 1448 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1449 MaxLoopPad = 11; 1450 } 1451 #endif // COMPILER2 1452 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1453 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1454 } 1455 if (supports_sse4_2()) { // new ZX cpus 1456 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1457 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1458 } 1459 } 1460 if (supports_sse4_2()) { 1461 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1462 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1463 } 1464 } else { 1465 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1466 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1467 } 1468 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1469 } 1470 } 1471 1472 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1473 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1474 } 1475 } 1476 1477 if (is_amd_family()) { // AMD cpus specific settings 1478 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1479 // Use it on new AMD cpus starting from Opteron. 1480 UseAddressNop = true; 1481 } 1482 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1483 // Use it on new AMD cpus starting from Opteron. 1484 UseNewLongLShift = true; 1485 } 1486 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1487 if (supports_sse4a()) { 1488 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1489 } else { 1490 UseXmmLoadAndClearUpper = false; 1491 } 1492 } 1493 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1494 if (supports_sse4a()) { 1495 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1496 } else { 1497 UseXmmRegToRegMoveAll = false; 1498 } 1499 } 1500 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1501 if (supports_sse4a()) { 1502 UseXmmI2F = true; 1503 } else { 1504 UseXmmI2F = false; 1505 } 1506 } 1507 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1508 if (supports_sse4a()) { 1509 UseXmmI2D = true; 1510 } else { 1511 UseXmmI2D = false; 1512 } 1513 } 1514 if (supports_sse4_2()) { 1515 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1516 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1517 } 1518 } else { 1519 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1520 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1521 } 1522 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1523 } 1524 1525 // some defaults for AMD family 15h 1526 if (cpu_family() == 0x15) { 1527 // On family 15h processors default is no sw prefetch 1528 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1529 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1530 } 1531 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1532 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1533 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1534 } 1535 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1536 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1537 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1538 } 1539 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1540 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1541 } 1542 } 1543 1544 #ifdef COMPILER2 1545 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1546 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1547 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1548 } 1549 #endif // COMPILER2 1550 1551 // Some defaults for AMD family >= 17h && Hygon family 18h 1552 if (cpu_family() >= 0x17) { 1553 // On family >=17h processors use XMM and UnalignedLoadStores 1554 // for Array Copy 1555 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1556 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1557 } 1558 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1559 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1560 } 1561 #ifdef COMPILER2 1562 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1563 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1564 } 1565 #endif 1566 } 1567 } 1568 1569 if (is_intel()) { // Intel cpus specific settings 1570 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1571 UseStoreImmI16 = false; // don't use it on Intel cpus 1572 } 1573 if (cpu_family() == 6 || cpu_family() == 15) { 1574 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1575 // Use it on all Intel cpus starting from PentiumPro 1576 UseAddressNop = true; 1577 } 1578 } 1579 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1580 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1581 } 1582 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1583 if (supports_sse3()) { 1584 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1585 } else { 1586 UseXmmRegToRegMoveAll = false; 1587 } 1588 } 1589 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1590 #ifdef COMPILER2 1591 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1592 // For new Intel cpus do the next optimization: 1593 // don't align the beginning of a loop if there are enough instructions 1594 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1595 // in current fetch line (OptoLoopAlignment) or the padding 1596 // is big (> MaxLoopPad). 1597 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1598 // generated NOP instructions. 11 is the largest size of one 1599 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1600 MaxLoopPad = 11; 1601 } 1602 #endif // COMPILER2 1603 1604 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1605 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1606 } 1607 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1608 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1609 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1610 } 1611 } 1612 if (supports_sse4_2()) { 1613 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1614 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1615 } 1616 } else { 1617 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1618 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1619 } 1620 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1621 } 1622 } 1623 if (is_atom_family() || is_knights_family()) { 1624 #ifdef COMPILER2 1625 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1626 OptoScheduling = true; 1627 } 1628 #endif 1629 if (supports_sse4_2()) { // Silvermont 1630 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1631 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1632 } 1633 } 1634 if (FLAG_IS_DEFAULT(UseIncDec)) { 1635 FLAG_SET_DEFAULT(UseIncDec, false); 1636 } 1637 } 1638 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1639 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1640 } 1641 #ifdef COMPILER2 1642 if (UseAVX > 2) { 1643 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1644 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1645 ArrayOperationPartialInlineSize != 0 && 1646 ArrayOperationPartialInlineSize != 16 && 1647 ArrayOperationPartialInlineSize != 32 && 1648 ArrayOperationPartialInlineSize != 64)) { 1649 int inline_size = 0; 1650 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1651 inline_size = 64; 1652 } else if (MaxVectorSize >= 32) { 1653 inline_size = 32; 1654 } else if (MaxVectorSize >= 16) { 1655 inline_size = 16; 1656 } 1657 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1658 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1659 } 1660 ArrayOperationPartialInlineSize = inline_size; 1661 } 1662 1663 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1664 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1665 if (ArrayOperationPartialInlineSize) { 1666 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1667 } else { 1668 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1669 } 1670 } 1671 } 1672 #endif 1673 } 1674 1675 #ifdef COMPILER2 1676 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1677 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1678 OptimizeFill = false; 1679 } 1680 } 1681 #endif 1682 1683 #ifdef _LP64 1684 if (UseSSE42Intrinsics) { 1685 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1686 UseVectorizedMismatchIntrinsic = true; 1687 } 1688 } else if (UseVectorizedMismatchIntrinsic) { 1689 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1690 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1691 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1692 } 1693 if (UseAVX >= 2) { 1694 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true); 1695 } else if (UseVectorizedHashCodeIntrinsic) { 1696 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) 1697 warning("vectorizedHashCode intrinsics are not available on this CPU"); 1698 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1699 } 1700 #else 1701 if (UseVectorizedMismatchIntrinsic) { 1702 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1703 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1704 } 1705 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1706 } 1707 if (UseVectorizedHashCodeIntrinsic) { 1708 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) { 1709 warning("vectorizedHashCode intrinsic is not available in 32-bit VM"); 1710 } 1711 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false); 1712 } 1713 #endif // _LP64 1714 1715 // Use count leading zeros count instruction if available. 1716 if (supports_lzcnt()) { 1717 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1718 UseCountLeadingZerosInstruction = true; 1719 } 1720 } else if (UseCountLeadingZerosInstruction) { 1721 warning("lzcnt instruction is not available on this CPU"); 1722 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1723 } 1724 1725 // Use count trailing zeros instruction if available 1726 if (supports_bmi1()) { 1727 // tzcnt does not require VEX prefix 1728 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1729 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1730 // Don't use tzcnt if BMI1 is switched off on command line. 1731 UseCountTrailingZerosInstruction = false; 1732 } else { 1733 UseCountTrailingZerosInstruction = true; 1734 } 1735 } 1736 } else if (UseCountTrailingZerosInstruction) { 1737 warning("tzcnt instruction is not available on this CPU"); 1738 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1739 } 1740 1741 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1742 // VEX prefix is generated only when AVX > 0. 1743 if (supports_bmi1() && supports_avx()) { 1744 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1745 UseBMI1Instructions = true; 1746 } 1747 } else if (UseBMI1Instructions) { 1748 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1749 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1750 } 1751 1752 if (supports_bmi2() && supports_avx()) { 1753 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1754 UseBMI2Instructions = true; 1755 } 1756 } else if (UseBMI2Instructions) { 1757 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1758 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1759 } 1760 1761 // Use population count instruction if available. 1762 if (supports_popcnt()) { 1763 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1764 UsePopCountInstruction = true; 1765 } 1766 } else if (UsePopCountInstruction) { 1767 warning("POPCNT instruction is not available on this CPU"); 1768 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1769 } 1770 1771 // Use fast-string operations if available. 1772 if (supports_erms()) { 1773 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1774 UseFastStosb = true; 1775 } 1776 } else if (UseFastStosb) { 1777 warning("fast-string operations are not available on this CPU"); 1778 FLAG_SET_DEFAULT(UseFastStosb, false); 1779 } 1780 1781 // For AMD Processors use XMM/YMM MOVDQU instructions 1782 // for Object Initialization as default 1783 if (is_amd() && cpu_family() >= 0x19) { 1784 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1785 UseFastStosb = false; 1786 } 1787 } 1788 1789 #ifdef COMPILER2 1790 if (is_intel() && MaxVectorSize > 16) { 1791 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1792 UseFastStosb = false; 1793 } 1794 } 1795 #endif 1796 1797 // Use XMM/YMM MOVDQU instruction for Object Initialization 1798 if (UseSSE >= 2 && UseUnalignedLoadStores) { 1799 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1800 UseXMMForObjInit = true; 1801 } 1802 } else if (UseXMMForObjInit) { 1803 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1804 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1805 } 1806 1807 #ifdef COMPILER2 1808 if (FLAG_IS_DEFAULT(AlignVector)) { 1809 // Modern processors allow misaligned memory operations for vectors. 1810 AlignVector = !UseUnalignedLoadStores; 1811 } 1812 #endif // COMPILER2 1813 1814 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1815 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1816 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1817 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1818 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1819 } 1820 } 1821 1822 // Allocation prefetch settings 1823 int cache_line_size = checked_cast<int>(prefetch_data_size()); 1824 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1825 (cache_line_size > AllocatePrefetchStepSize)) { 1826 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1827 } 1828 1829 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1830 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1831 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1832 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1833 } 1834 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1835 } 1836 1837 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1838 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1839 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1840 } 1841 1842 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1843 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1844 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1845 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1846 } 1847 #ifdef COMPILER2 1848 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1849 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1850 } 1851 #endif 1852 } 1853 1854 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1855 #ifdef COMPILER2 1856 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1857 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1858 } 1859 #endif 1860 } 1861 1862 #ifdef _LP64 1863 // Prefetch settings 1864 1865 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1866 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1867 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1868 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1869 1870 // gc copy/scan is disabled if prefetchw isn't supported, because 1871 // Prefetch::write emits an inlined prefetchw on Linux. 1872 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1873 // The used prefetcht0 instruction works for both amd64 and em64t. 1874 1875 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1876 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1877 } 1878 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1879 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1880 } 1881 #endif 1882 1883 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1884 (cache_line_size > ContendedPaddingWidth)) 1885 ContendedPaddingWidth = cache_line_size; 1886 1887 // This machine allows unaligned memory accesses 1888 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1889 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1890 } 1891 1892 #ifndef PRODUCT 1893 if (log_is_enabled(Info, os, cpu)) { 1894 LogStream ls(Log(os, cpu)::info()); 1895 outputStream* log = &ls; 1896 log->print_cr("Logical CPUs per core: %u", 1897 logical_processors_per_package()); 1898 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1899 log->print("UseSSE=%d", UseSSE); 1900 if (UseAVX > 0) { 1901 log->print(" UseAVX=%d", UseAVX); 1902 } 1903 if (UseAES) { 1904 log->print(" UseAES=1"); 1905 } 1906 #ifdef COMPILER2 1907 if (MaxVectorSize > 0) { 1908 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1909 } 1910 #endif 1911 log->cr(); 1912 log->print("Allocation"); 1913 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1914 log->print_cr(": no prefetching"); 1915 } else { 1916 log->print(" prefetching: "); 1917 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1918 log->print("PREFETCHW"); 1919 } else if (UseSSE >= 1) { 1920 if (AllocatePrefetchInstr == 0) { 1921 log->print("PREFETCHNTA"); 1922 } else if (AllocatePrefetchInstr == 1) { 1923 log->print("PREFETCHT0"); 1924 } else if (AllocatePrefetchInstr == 2) { 1925 log->print("PREFETCHT2"); 1926 } else if (AllocatePrefetchInstr == 3) { 1927 log->print("PREFETCHW"); 1928 } 1929 } 1930 if (AllocatePrefetchLines > 1) { 1931 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize); 1932 } else { 1933 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize); 1934 } 1935 } 1936 1937 if (PrefetchCopyIntervalInBytes > 0) { 1938 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1939 } 1940 if (PrefetchScanIntervalInBytes > 0) { 1941 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1942 } 1943 if (ContendedPaddingWidth > 0) { 1944 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1945 } 1946 } 1947 #endif // !PRODUCT 1948 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1949 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1950 } 1951 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1952 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1953 } 1954 } 1955 1956 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1957 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1958 if (vrt == XenHVM) { 1959 st->print_cr("Xen hardware-assisted virtualization detected"); 1960 } else if (vrt == KVM) { 1961 st->print_cr("KVM virtualization detected"); 1962 } else if (vrt == VMWare) { 1963 st->print_cr("VMWare virtualization detected"); 1964 VirtualizationSupport::print_virtualization_info(st); 1965 } else if (vrt == HyperV) { 1966 st->print_cr("Hyper-V virtualization detected"); 1967 } else if (vrt == HyperVRole) { 1968 st->print_cr("Hyper-V role detected"); 1969 } 1970 } 1971 1972 bool VM_Version::compute_has_intel_jcc_erratum() { 1973 if (!is_intel_family_core()) { 1974 // Only Intel CPUs are affected. 1975 return false; 1976 } 1977 // The following table of affected CPUs is based on the following document released by Intel: 1978 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1979 switch (_model) { 1980 case 0x8E: 1981 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1982 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1983 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1984 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1985 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1986 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1987 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1988 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1989 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1990 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1991 case 0x4E: 1992 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1993 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1994 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1995 return _stepping == 0x3; 1996 case 0x55: 1997 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1998 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1999 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 2000 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 2001 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 2002 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 2003 return _stepping == 0x4 || _stepping == 0x7; 2004 case 0x5E: 2005 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 2006 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 2007 return _stepping == 0x3; 2008 case 0x9E: 2009 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2010 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2011 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2012 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2013 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2014 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2015 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2016 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2017 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2018 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2019 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2020 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2021 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2022 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2023 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2024 case 0xA5: 2025 // Not in Intel documentation. 2026 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2027 return true; 2028 case 0xA6: 2029 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2030 return _stepping == 0x0; 2031 case 0xAE: 2032 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2033 return _stepping == 0xA; 2034 default: 2035 // If we are running on another intel machine not recognized in the table, we are okay. 2036 return false; 2037 } 2038 } 2039 2040 // On Xen, the cpuid instruction returns 2041 // eax / registers[0]: Version of Xen 2042 // ebx / registers[1]: chars 'XenV' 2043 // ecx / registers[2]: chars 'MMXe' 2044 // edx / registers[3]: chars 'nVMM' 2045 // 2046 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2047 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2048 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2049 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2050 // 2051 // more information : 2052 // https://kb.vmware.com/s/article/1009458 2053 // 2054 void VM_Version::check_virtualizations() { 2055 uint32_t registers[4] = {0}; 2056 char signature[13] = {0}; 2057 2058 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2059 // from 0x40000000 until 0x40010000. 2060 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2061 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2062 detect_virt_stub(leaf, registers); 2063 memcpy(signature, ®isters[1], 12); 2064 2065 if (strncmp("VMwareVMware", signature, 12) == 0) { 2066 Abstract_VM_Version::_detected_virtualization = VMWare; 2067 // check for extended metrics from guestlib 2068 VirtualizationSupport::initialize(); 2069 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2070 Abstract_VM_Version::_detected_virtualization = HyperV; 2071 #ifdef _WINDOWS 2072 // CPUID leaf 0x40000007 is available to the root partition only. 2073 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2074 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2075 detect_virt_stub(0x40000007, registers); 2076 if ((registers[0] != 0x0) || 2077 (registers[1] != 0x0) || 2078 (registers[2] != 0x0) || 2079 (registers[3] != 0x0)) { 2080 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2081 } 2082 #endif 2083 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2084 Abstract_VM_Version::_detected_virtualization = KVM; 2085 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2086 Abstract_VM_Version::_detected_virtualization = XenHVM; 2087 } 2088 } 2089 } 2090 2091 #ifdef COMPILER2 2092 // Determine if it's running on Cascade Lake using default options. 2093 bool VM_Version::is_default_intel_cascade_lake() { 2094 return FLAG_IS_DEFAULT(UseAVX) && 2095 FLAG_IS_DEFAULT(MaxVectorSize) && 2096 UseAVX > 2 && 2097 is_intel_cascade_lake(); 2098 } 2099 #endif 2100 2101 bool VM_Version::is_intel_cascade_lake() { 2102 return is_intel_skylake() && _stepping >= 5; 2103 } 2104 2105 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2106 // for implementing the array copy and clear operations. 2107 // The Intel platforms that supports the serialize instruction 2108 // has improved implementation of 64-byte load/stores and so the default 2109 // threshold is set to 0 for these platforms. 2110 int VM_Version::avx3_threshold() { 2111 return (is_intel_family_core() && 2112 supports_serialize() && 2113 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2114 } 2115 2116 static bool _vm_version_initialized = false; 2117 2118 void VM_Version::initialize() { 2119 ResourceMark rm; 2120 // Making this stub must be FIRST use of assembler 2121 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2122 if (stub_blob == nullptr) { 2123 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2124 } 2125 CodeBuffer c(stub_blob); 2126 VM_Version_StubGenerator g(&c); 2127 2128 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2129 g.generate_get_cpu_info()); 2130 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2131 g.generate_detect_virt()); 2132 2133 get_processor_features(); 2134 2135 LP64_ONLY(Assembler::precompute_instructions();) 2136 2137 if (VM_Version::supports_hv()) { // Supports hypervisor 2138 check_virtualizations(); 2139 } 2140 _vm_version_initialized = true; 2141 } 2142 2143 typedef enum { 2144 CPU_FAMILY_8086_8088 = 0, 2145 CPU_FAMILY_INTEL_286 = 2, 2146 CPU_FAMILY_INTEL_386 = 3, 2147 CPU_FAMILY_INTEL_486 = 4, 2148 CPU_FAMILY_PENTIUM = 5, 2149 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2150 CPU_FAMILY_PENTIUM_4 = 0xF 2151 } FamilyFlag; 2152 2153 typedef enum { 2154 RDTSCP_FLAG = 0x08000000, // bit 27 2155 INTEL64_FLAG = 0x20000000 // bit 29 2156 } _featureExtendedEdxFlag; 2157 2158 typedef enum { 2159 FPU_FLAG = 0x00000001, 2160 VME_FLAG = 0x00000002, 2161 DE_FLAG = 0x00000004, 2162 PSE_FLAG = 0x00000008, 2163 TSC_FLAG = 0x00000010, 2164 MSR_FLAG = 0x00000020, 2165 PAE_FLAG = 0x00000040, 2166 MCE_FLAG = 0x00000080, 2167 CX8_FLAG = 0x00000100, 2168 APIC_FLAG = 0x00000200, 2169 SEP_FLAG = 0x00000800, 2170 MTRR_FLAG = 0x00001000, 2171 PGE_FLAG = 0x00002000, 2172 MCA_FLAG = 0x00004000, 2173 CMOV_FLAG = 0x00008000, 2174 PAT_FLAG = 0x00010000, 2175 PSE36_FLAG = 0x00020000, 2176 PSNUM_FLAG = 0x00040000, 2177 CLFLUSH_FLAG = 0x00080000, 2178 DTS_FLAG = 0x00200000, 2179 ACPI_FLAG = 0x00400000, 2180 MMX_FLAG = 0x00800000, 2181 FXSR_FLAG = 0x01000000, 2182 SSE_FLAG = 0x02000000, 2183 SSE2_FLAG = 0x04000000, 2184 SS_FLAG = 0x08000000, 2185 HTT_FLAG = 0x10000000, 2186 TM_FLAG = 0x20000000 2187 } FeatureEdxFlag; 2188 2189 static BufferBlob* cpuid_brand_string_stub_blob; 2190 static const int cpuid_brand_string_stub_size = 550; 2191 2192 extern "C" { 2193 typedef void (*getCPUIDBrandString_stub_t)(void*); 2194 } 2195 2196 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr; 2197 2198 // VM_Version statics 2199 enum { 2200 ExtendedFamilyIdLength_INTEL = 16, 2201 ExtendedFamilyIdLength_AMD = 24 2202 }; 2203 2204 const size_t VENDOR_LENGTH = 13; 2205 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2206 static char* _cpu_brand_string = nullptr; 2207 static int64_t _max_qualified_cpu_frequency = 0; 2208 2209 static int _no_of_threads = 0; 2210 static int _no_of_cores = 0; 2211 2212 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2213 "8086/8088", 2214 "", 2215 "286", 2216 "386", 2217 "486", 2218 "Pentium", 2219 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2220 "", 2221 "", 2222 "", 2223 "", 2224 "", 2225 "", 2226 "", 2227 "", 2228 "Pentium 4" 2229 }; 2230 2231 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2232 "", 2233 "", 2234 "", 2235 "", 2236 "5x86", 2237 "K5/K6", 2238 "Athlon/AthlonXP", 2239 "", 2240 "", 2241 "", 2242 "", 2243 "", 2244 "", 2245 "", 2246 "", 2247 "Opteron/Athlon64", 2248 "Opteron QC/Phenom", // Barcelona et.al. 2249 "", 2250 "", 2251 "", 2252 "", 2253 "", 2254 "", 2255 "Zen" 2256 }; 2257 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2258 // September 2013, Vol 3C Table 35-1 2259 const char* const _model_id_pentium_pro[] = { 2260 "", 2261 "Pentium Pro", 2262 "", 2263 "Pentium II model 3", 2264 "", 2265 "Pentium II model 5/Xeon/Celeron", 2266 "Celeron", 2267 "Pentium III/Pentium III Xeon", 2268 "Pentium III/Pentium III Xeon", 2269 "Pentium M model 9", // Yonah 2270 "Pentium III, model A", 2271 "Pentium III, model B", 2272 "", 2273 "Pentium M model D", // Dothan 2274 "", 2275 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2276 "", 2277 "", 2278 "", 2279 "", 2280 "", 2281 "", 2282 "Celeron", // 0x16 Celeron 65nm 2283 "Core 2", // 0x17 Penryn / Harpertown 2284 "", 2285 "", 2286 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2287 "Atom", // 0x1B Z5xx series Silverthorn 2288 "", 2289 "Core 2", // 0x1D Dunnington (6-core) 2290 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2291 "", 2292 "", 2293 "", 2294 "", 2295 "", 2296 "", 2297 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2298 "", 2299 "", 2300 "", // 0x28 2301 "", 2302 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2303 "", 2304 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2305 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2306 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2307 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2308 "", 2309 "", 2310 "", 2311 "", 2312 "", 2313 "", 2314 "", 2315 "", 2316 "", 2317 "", 2318 "Ivy Bridge", // 0x3a 2319 "", 2320 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2321 "", // 0x3d "Next Generation Intel Core Processor" 2322 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2323 "", // 0x3f "Future Generation Intel Xeon Processor" 2324 "", 2325 "", 2326 "", 2327 "", 2328 "", 2329 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2330 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2331 nullptr 2332 }; 2333 2334 /* Brand ID is for back compatibility 2335 * Newer CPUs uses the extended brand string */ 2336 const char* const _brand_id[] = { 2337 "", 2338 "Celeron processor", 2339 "Pentium III processor", 2340 "Intel Pentium III Xeon processor", 2341 "", 2342 "", 2343 "", 2344 "", 2345 "Intel Pentium 4 processor", 2346 nullptr 2347 }; 2348 2349 2350 const char* const _feature_edx_id[] = { 2351 "On-Chip FPU", 2352 "Virtual Mode Extensions", 2353 "Debugging Extensions", 2354 "Page Size Extensions", 2355 "Time Stamp Counter", 2356 "Model Specific Registers", 2357 "Physical Address Extension", 2358 "Machine Check Exceptions", 2359 "CMPXCHG8B Instruction", 2360 "On-Chip APIC", 2361 "", 2362 "Fast System Call", 2363 "Memory Type Range Registers", 2364 "Page Global Enable", 2365 "Machine Check Architecture", 2366 "Conditional Mov Instruction", 2367 "Page Attribute Table", 2368 "36-bit Page Size Extension", 2369 "Processor Serial Number", 2370 "CLFLUSH Instruction", 2371 "", 2372 "Debug Trace Store feature", 2373 "ACPI registers in MSR space", 2374 "Intel Architecture MMX Technology", 2375 "Fast Float Point Save and Restore", 2376 "Streaming SIMD extensions", 2377 "Streaming SIMD extensions 2", 2378 "Self-Snoop", 2379 "Hyper Threading", 2380 "Thermal Monitor", 2381 "", 2382 "Pending Break Enable" 2383 }; 2384 2385 const char* const _feature_extended_edx_id[] = { 2386 "", 2387 "", 2388 "", 2389 "", 2390 "", 2391 "", 2392 "", 2393 "", 2394 "", 2395 "", 2396 "", 2397 "SYSCALL/SYSRET", 2398 "", 2399 "", 2400 "", 2401 "", 2402 "", 2403 "", 2404 "", 2405 "", 2406 "Execute Disable Bit", 2407 "", 2408 "", 2409 "", 2410 "", 2411 "", 2412 "", 2413 "RDTSCP", 2414 "", 2415 "Intel 64 Architecture", 2416 "", 2417 "" 2418 }; 2419 2420 const char* const _feature_ecx_id[] = { 2421 "Streaming SIMD Extensions 3", 2422 "PCLMULQDQ", 2423 "64-bit DS Area", 2424 "MONITOR/MWAIT instructions", 2425 "CPL Qualified Debug Store", 2426 "Virtual Machine Extensions", 2427 "Safer Mode Extensions", 2428 "Enhanced Intel SpeedStep technology", 2429 "Thermal Monitor 2", 2430 "Supplemental Streaming SIMD Extensions 3", 2431 "L1 Context ID", 2432 "", 2433 "Fused Multiply-Add", 2434 "CMPXCHG16B", 2435 "xTPR Update Control", 2436 "Perfmon and Debug Capability", 2437 "", 2438 "Process-context identifiers", 2439 "Direct Cache Access", 2440 "Streaming SIMD extensions 4.1", 2441 "Streaming SIMD extensions 4.2", 2442 "x2APIC", 2443 "MOVBE", 2444 "Popcount instruction", 2445 "TSC-Deadline", 2446 "AESNI", 2447 "XSAVE", 2448 "OSXSAVE", 2449 "AVX", 2450 "F16C", 2451 "RDRAND", 2452 "" 2453 }; 2454 2455 const char* const _feature_extended_ecx_id[] = { 2456 "LAHF/SAHF instruction support", 2457 "Core multi-processor legacy mode", 2458 "", 2459 "", 2460 "", 2461 "Advanced Bit Manipulations: LZCNT", 2462 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2463 "Misaligned SSE mode", 2464 "", 2465 "", 2466 "", 2467 "", 2468 "", 2469 "", 2470 "", 2471 "", 2472 "", 2473 "", 2474 "", 2475 "", 2476 "", 2477 "", 2478 "", 2479 "", 2480 "", 2481 "", 2482 "", 2483 "", 2484 "", 2485 "", 2486 "", 2487 "" 2488 }; 2489 2490 void VM_Version::initialize_tsc(void) { 2491 ResourceMark rm; 2492 2493 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2494 if (cpuid_brand_string_stub_blob == nullptr) { 2495 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2496 } 2497 CodeBuffer c(cpuid_brand_string_stub_blob); 2498 VM_Version_StubGenerator g(&c); 2499 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2500 g.generate_getCPUIDBrandString()); 2501 } 2502 2503 const char* VM_Version::cpu_model_description(void) { 2504 uint32_t cpu_family = extended_cpu_family(); 2505 uint32_t cpu_model = extended_cpu_model(); 2506 const char* model = nullptr; 2507 2508 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2509 for (uint32_t i = 0; i <= cpu_model; i++) { 2510 model = _model_id_pentium_pro[i]; 2511 if (model == nullptr) { 2512 break; 2513 } 2514 } 2515 } 2516 return model; 2517 } 2518 2519 const char* VM_Version::cpu_brand_string(void) { 2520 if (_cpu_brand_string == nullptr) { 2521 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2522 if (nullptr == _cpu_brand_string) { 2523 return nullptr; 2524 } 2525 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2526 if (ret_val != OS_OK) { 2527 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2528 _cpu_brand_string = nullptr; 2529 } 2530 } 2531 return _cpu_brand_string; 2532 } 2533 2534 const char* VM_Version::cpu_brand(void) { 2535 const char* brand = nullptr; 2536 2537 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2538 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2539 brand = _brand_id[0]; 2540 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) { 2541 brand = _brand_id[i]; 2542 } 2543 } 2544 return brand; 2545 } 2546 2547 bool VM_Version::cpu_is_em64t(void) { 2548 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2549 } 2550 2551 bool VM_Version::is_netburst(void) { 2552 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2553 } 2554 2555 bool VM_Version::supports_tscinv_ext(void) { 2556 if (!supports_tscinv_bit()) { 2557 return false; 2558 } 2559 2560 if (is_intel()) { 2561 return true; 2562 } 2563 2564 if (is_amd()) { 2565 return !is_amd_Barcelona(); 2566 } 2567 2568 if (is_hygon()) { 2569 return true; 2570 } 2571 2572 return false; 2573 } 2574 2575 void VM_Version::resolve_cpu_information_details(void) { 2576 2577 // in future we want to base this information on proper cpu 2578 // and cache topology enumeration such as: 2579 // Intel 64 Architecture Processor Topology Enumeration 2580 // which supports system cpu and cache topology enumeration 2581 // either using 2xAPICIDs or initial APICIDs 2582 2583 // currently only rough cpu information estimates 2584 // which will not necessarily reflect the exact configuration of the system 2585 2586 // this is the number of logical hardware threads 2587 // visible to the operating system 2588 _no_of_threads = os::processor_count(); 2589 2590 // find out number of threads per cpu package 2591 int threads_per_package = threads_per_core() * cores_per_cpu(); 2592 2593 // use amount of threads visible to the process in order to guess number of sockets 2594 _no_of_sockets = _no_of_threads / threads_per_package; 2595 2596 // process might only see a subset of the total number of threads 2597 // from a single processor package. Virtualization/resource management for example. 2598 // If so then just write a hard 1 as num of pkgs. 2599 if (0 == _no_of_sockets) { 2600 _no_of_sockets = 1; 2601 } 2602 2603 // estimate the number of cores 2604 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2605 } 2606 2607 2608 const char* VM_Version::cpu_family_description(void) { 2609 int cpu_family_id = extended_cpu_family(); 2610 if (is_amd()) { 2611 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2612 return _family_id_amd[cpu_family_id]; 2613 } 2614 } 2615 if (is_intel()) { 2616 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2617 return cpu_model_description(); 2618 } 2619 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2620 return _family_id_intel[cpu_family_id]; 2621 } 2622 } 2623 if (is_hygon()) { 2624 return "Dhyana"; 2625 } 2626 return "Unknown x86"; 2627 } 2628 2629 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2630 assert(buf != nullptr, "buffer is null!"); 2631 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2632 2633 const char* cpu_type = nullptr; 2634 const char* x64 = nullptr; 2635 2636 if (is_intel()) { 2637 cpu_type = "Intel"; 2638 x64 = cpu_is_em64t() ? " Intel64" : ""; 2639 } else if (is_amd()) { 2640 cpu_type = "AMD"; 2641 x64 = cpu_is_em64t() ? " AMD64" : ""; 2642 } else if (is_hygon()) { 2643 cpu_type = "Hygon"; 2644 x64 = cpu_is_em64t() ? " AMD64" : ""; 2645 } else { 2646 cpu_type = "Unknown x86"; 2647 x64 = cpu_is_em64t() ? " x86_64" : ""; 2648 } 2649 2650 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2651 cpu_type, 2652 cpu_family_description(), 2653 supports_ht() ? " (HT)" : "", 2654 supports_sse3() ? " SSE3" : "", 2655 supports_ssse3() ? " SSSE3" : "", 2656 supports_sse4_1() ? " SSE4.1" : "", 2657 supports_sse4_2() ? " SSE4.2" : "", 2658 supports_sse4a() ? " SSE4A" : "", 2659 is_netburst() ? " Netburst" : "", 2660 is_intel_family_core() ? " Core" : "", 2661 x64); 2662 2663 return OS_OK; 2664 } 2665 2666 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2667 assert(buf != nullptr, "buffer is null!"); 2668 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2669 assert(getCPUIDBrandString_stub != nullptr, "not initialized"); 2670 2671 // invoke newly generated asm code to fetch CPU Brand String 2672 getCPUIDBrandString_stub(&_cpuid_info); 2673 2674 // fetch results into buffer 2675 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2676 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2677 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2678 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2679 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2680 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2681 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2682 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2683 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2684 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2685 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2686 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2687 2688 return OS_OK; 2689 } 2690 2691 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2692 guarantee(buf != nullptr, "buffer is null!"); 2693 guarantee(buf_len > 0, "buffer len not enough!"); 2694 2695 unsigned int flag = 0; 2696 unsigned int fi = 0; 2697 size_t written = 0; 2698 const char* prefix = ""; 2699 2700 #define WRITE_TO_BUF(string) \ 2701 { \ 2702 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2703 if (res < 0) { \ 2704 return buf_len - 1; \ 2705 } \ 2706 written += res; \ 2707 if (prefix[0] == '\0') { \ 2708 prefix = ", "; \ 2709 } \ 2710 } 2711 2712 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2713 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2714 continue; /* no hyperthreading */ 2715 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2716 continue; /* no fast system call */ 2717 } 2718 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2719 WRITE_TO_BUF(_feature_edx_id[fi]); 2720 } 2721 } 2722 2723 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2724 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2725 WRITE_TO_BUF(_feature_ecx_id[fi]); 2726 } 2727 } 2728 2729 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2730 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2731 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2732 } 2733 } 2734 2735 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2736 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2737 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2738 } 2739 } 2740 2741 if (supports_tscinv_bit()) { 2742 WRITE_TO_BUF("Invariant TSC"); 2743 } 2744 2745 return written; 2746 } 2747 2748 /** 2749 * Write a detailed description of the cpu to a given buffer, including 2750 * feature set. 2751 */ 2752 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2753 assert(buf != nullptr, "buffer is null!"); 2754 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2755 2756 static const char* unknown = "<unknown>"; 2757 char vendor_id[VENDOR_LENGTH]; 2758 const char* family = nullptr; 2759 const char* model = nullptr; 2760 const char* brand = nullptr; 2761 int outputLen = 0; 2762 2763 family = cpu_family_description(); 2764 if (family == nullptr) { 2765 family = unknown; 2766 } 2767 2768 model = cpu_model_description(); 2769 if (model == nullptr) { 2770 model = unknown; 2771 } 2772 2773 brand = cpu_brand_string(); 2774 2775 if (brand == nullptr) { 2776 brand = cpu_brand(); 2777 if (brand == nullptr) { 2778 brand = unknown; 2779 } 2780 } 2781 2782 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2783 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2784 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2785 vendor_id[VENDOR_LENGTH-1] = '\0'; 2786 2787 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2788 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2789 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2790 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2791 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2792 "Supports: ", 2793 brand, 2794 vendor_id, 2795 family, 2796 extended_cpu_family(), 2797 model, 2798 extended_cpu_model(), 2799 cpu_stepping(), 2800 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2801 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2802 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2803 _cpuid_info.std_cpuid1_eax.value, 2804 _cpuid_info.std_cpuid1_ebx.value, 2805 _cpuid_info.std_cpuid1_ecx.value, 2806 _cpuid_info.std_cpuid1_edx.value, 2807 _cpuid_info.ext_cpuid1_eax, 2808 _cpuid_info.ext_cpuid1_ebx, 2809 _cpuid_info.ext_cpuid1_ecx, 2810 _cpuid_info.ext_cpuid1_edx); 2811 2812 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2813 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2814 return OS_ERR; 2815 } 2816 2817 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2818 2819 return OS_OK; 2820 } 2821 2822 2823 // Fill in Abstract_VM_Version statics 2824 void VM_Version::initialize_cpu_information() { 2825 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2826 assert(!_initialized, "shouldn't be initialized yet"); 2827 resolve_cpu_information_details(); 2828 2829 // initialize cpu_name and cpu_desc 2830 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2831 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2832 _initialized = true; 2833 } 2834 2835 /** 2836 * For information about extracting the frequency from the cpu brand string, please see: 2837 * 2838 * Intel Processor Identification and the CPUID Instruction 2839 * Application Note 485 2840 * May 2012 2841 * 2842 * The return value is the frequency in Hz. 2843 */ 2844 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2845 const char* const brand_string = cpu_brand_string(); 2846 if (brand_string == nullptr) { 2847 return 0; 2848 } 2849 const int64_t MEGA = 1000000; 2850 int64_t multiplier = 0; 2851 int64_t frequency = 0; 2852 uint8_t idx = 0; 2853 // The brand string buffer is at most 48 bytes. 2854 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2855 for (; idx < 48-2; ++idx) { 2856 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2857 // Search brand string for "yHz" where y is M, G, or T. 2858 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2859 if (brand_string[idx] == 'M') { 2860 multiplier = MEGA; 2861 } else if (brand_string[idx] == 'G') { 2862 multiplier = MEGA * 1000; 2863 } else if (brand_string[idx] == 'T') { 2864 multiplier = MEGA * MEGA; 2865 } 2866 break; 2867 } 2868 } 2869 if (multiplier > 0) { 2870 // Compute frequency (in Hz) from brand string. 2871 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2872 frequency = (brand_string[idx-4] - '0') * multiplier; 2873 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2874 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2875 } else { // format is "xxxx" 2876 frequency = (brand_string[idx-4] - '0') * 1000; 2877 frequency += (brand_string[idx-3] - '0') * 100; 2878 frequency += (brand_string[idx-2] - '0') * 10; 2879 frequency += (brand_string[idx-1] - '0'); 2880 frequency *= multiplier; 2881 } 2882 } 2883 return frequency; 2884 } 2885 2886 2887 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2888 if (_max_qualified_cpu_frequency == 0) { 2889 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2890 } 2891 return _max_qualified_cpu_frequency; 2892 } 2893 2894 uint64_t VM_Version::feature_flags() { 2895 uint64_t result = 0; 2896 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 2897 result |= CPU_CX8; 2898 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 2899 result |= CPU_CMOV; 2900 if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0) 2901 result |= CPU_FLUSH; 2902 #ifdef _LP64 2903 // clflush should always be available on x86_64 2904 // if not we are in real trouble because we rely on it 2905 // to flush the code cache. 2906 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2907 #endif 2908 if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2909 _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) 2910 result |= CPU_FXSR; 2911 // HT flag is set for multi-core processors also. 2912 if (threads_per_core() > 1) 2913 result |= CPU_HT; 2914 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2915 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) 2916 result |= CPU_MMX; 2917 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 2918 result |= CPU_SSE; 2919 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 2920 result |= CPU_SSE2; 2921 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 2922 result |= CPU_SSE3; 2923 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 2924 result |= CPU_SSSE3; 2925 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 2926 result |= CPU_SSE4_1; 2927 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 2928 result |= CPU_SSE4_2; 2929 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 2930 result |= CPU_POPCNT; 2931 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && 2932 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && 2933 _cpuid_info.xem_xcr0_eax.bits.sse != 0 && 2934 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { 2935 result |= CPU_AVX; 2936 result |= CPU_VZEROUPPER; 2937 if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0) 2938 result |= CPU_F16C; 2939 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) 2940 result |= CPU_AVX2; 2941 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 && 2942 _cpuid_info.xem_xcr0_eax.bits.opmask != 0 && 2943 _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 && 2944 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) { 2945 result |= CPU_AVX512F; 2946 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0) 2947 result |= CPU_AVX512CD; 2948 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0) 2949 result |= CPU_AVX512DQ; 2950 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0) 2951 result |= CPU_AVX512_IFMA; 2952 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0) 2953 result |= CPU_AVX512PF; 2954 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0) 2955 result |= CPU_AVX512ER; 2956 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0) 2957 result |= CPU_AVX512BW; 2958 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0) 2959 result |= CPU_AVX512VL; 2960 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2961 result |= CPU_AVX512_VPOPCNTDQ; 2962 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2963 result |= CPU_AVX512_VPCLMULQDQ; 2964 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0) 2965 result |= CPU_AVX512_VAES; 2966 if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0) 2967 result |= CPU_GFNI; 2968 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0) 2969 result |= CPU_AVX512_VNNI; 2970 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2971 result |= CPU_AVX512_BITALG; 2972 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2973 result |= CPU_AVX512_VBMI; 2974 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2975 result |= CPU_AVX512_VBMI2; 2976 } 2977 } 2978 if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0) 2979 result |= CPU_HV; 2980 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) 2981 result |= CPU_BMI1; 2982 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) 2983 result |= CPU_TSC; 2984 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) 2985 result |= CPU_TSCINV_BIT; 2986 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) 2987 result |= CPU_AES; 2988 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) 2989 result |= CPU_ERMS; 2990 if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 2991 result |= CPU_FSRM; 2992 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) 2993 result |= CPU_CLMUL; 2994 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0) 2995 result |= CPU_RTM; 2996 if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0) 2997 result |= CPU_ADX; 2998 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) 2999 result |= CPU_BMI2; 3000 if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0) 3001 result |= CPU_SHA; 3002 if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0) 3003 result |= CPU_FMA; 3004 if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0) 3005 result |= CPU_FLUSHOPT; 3006 if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0) 3007 result |= CPU_RDTSCP; 3008 if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0) 3009 result |= CPU_RDPID; 3010 3011 // AMD|Hygon features. 3012 if (is_amd_family()) { 3013 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || 3014 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) 3015 result |= CPU_3DNOW_PREFETCH; 3016 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) 3017 result |= CPU_LZCNT; 3018 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 3019 result |= CPU_SSE4A; 3020 } 3021 3022 // Intel features. 3023 if (is_intel()) { 3024 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3025 result |= CPU_LZCNT; 3026 } 3027 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3028 result |= CPU_3DNOW_PREFETCH; 3029 } 3030 if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) { 3031 result |= CPU_CLWB; 3032 } 3033 if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0) 3034 result |= CPU_SERIALIZE; 3035 } 3036 3037 // ZX features. 3038 if (is_zx()) { 3039 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3040 result |= CPU_LZCNT; 3041 } 3042 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3043 result |= CPU_3DNOW_PREFETCH; 3044 } 3045 } 3046 3047 // Protection key features. 3048 if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) { 3049 result |= CPU_PKU; 3050 } 3051 if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) { 3052 result |= CPU_OSPKE; 3053 } 3054 3055 // Control flow enforcement (CET) features. 3056 if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) { 3057 result |= CPU_CET_SS; 3058 } 3059 if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) { 3060 result |= CPU_CET_IBT; 3061 } 3062 3063 // Composite features. 3064 if (supports_tscinv_bit() && 3065 ((is_amd_family() && !is_amd_Barcelona()) || 3066 is_intel_tsc_synched_at_init())) { 3067 result |= CPU_TSCINV; 3068 } 3069 3070 return result; 3071 } 3072 3073 bool VM_Version::os_supports_avx_vectors() { 3074 bool retVal = false; 3075 int nreg = 2 LP64_ONLY(+2); 3076 if (supports_evex()) { 3077 // Verify that OS save/restore all bits of EVEX registers 3078 // during signal processing. 3079 retVal = true; 3080 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3081 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3082 retVal = false; 3083 break; 3084 } 3085 } 3086 } else if (supports_avx()) { 3087 // Verify that OS save/restore all bits of AVX registers 3088 // during signal processing. 3089 retVal = true; 3090 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3091 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3092 retVal = false; 3093 break; 3094 } 3095 } 3096 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3097 if (retVal == false) { 3098 // Verify that OS save/restore all bits of EVEX registers 3099 // during signal processing. 3100 retVal = true; 3101 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3102 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3103 retVal = false; 3104 break; 3105 } 3106 } 3107 } 3108 } 3109 return retVal; 3110 } 3111 3112 uint VM_Version::cores_per_cpu() { 3113 uint result = 1; 3114 if (is_intel()) { 3115 bool supports_topology = supports_processor_topology(); 3116 if (supports_topology) { 3117 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3118 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3119 } 3120 if (!supports_topology || result == 0) { 3121 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3122 } 3123 } else if (is_amd_family()) { 3124 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3125 } else if (is_zx()) { 3126 bool supports_topology = supports_processor_topology(); 3127 if (supports_topology) { 3128 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3129 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3130 } 3131 if (!supports_topology || result == 0) { 3132 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3133 } 3134 } 3135 return result; 3136 } 3137 3138 uint VM_Version::threads_per_core() { 3139 uint result = 1; 3140 if (is_intel() && supports_processor_topology()) { 3141 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3142 } else if (is_zx() && supports_processor_topology()) { 3143 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3144 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3145 if (cpu_family() >= 0x17) { 3146 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3147 } else { 3148 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3149 cores_per_cpu(); 3150 } 3151 } 3152 return (result == 0 ? 1 : result); 3153 } 3154 3155 uint VM_Version::L1_line_size() { 3156 uint result = 0; 3157 if (is_intel()) { 3158 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3159 } else if (is_amd_family()) { 3160 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3161 } else if (is_zx()) { 3162 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3163 } 3164 if (result < 32) // not defined ? 3165 result = 32; // 32 bytes by default on x86 and other x64 3166 return result; 3167 } 3168 3169 bool VM_Version::is_intel_tsc_synched_at_init() { 3170 if (is_intel_family_core()) { 3171 uint32_t ext_model = extended_cpu_model(); 3172 if (ext_model == CPU_MODEL_NEHALEM_EP || 3173 ext_model == CPU_MODEL_WESTMERE_EP || 3174 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3175 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3176 // <= 2-socket invariant tsc support. EX versions are usually used 3177 // in > 2-socket systems and likely don't synchronize tscs at 3178 // initialization. 3179 // Code that uses tsc values must be prepared for them to arbitrarily 3180 // jump forward or backward. 3181 return true; 3182 } 3183 } 3184 return false; 3185 } 3186 3187 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3188 // Hardware prefetching (distance/size in bytes): 3189 // Pentium 3 - 64 / 32 3190 // Pentium 4 - 256 / 128 3191 // Athlon - 64 / 32 ???? 3192 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3193 // Core - 128 / 64 3194 // 3195 // Software prefetching (distance in bytes / instruction with best score): 3196 // Pentium 3 - 128 / prefetchnta 3197 // Pentium 4 - 512 / prefetchnta 3198 // Athlon - 128 / prefetchnta 3199 // Opteron - 256 / prefetchnta 3200 // Core - 256 / prefetchnta 3201 // It will be used only when AllocatePrefetchStyle > 0 3202 3203 if (is_amd_family()) { // AMD | Hygon 3204 if (supports_sse2()) { 3205 return 256; // Opteron 3206 } else { 3207 return 128; // Athlon 3208 } 3209 } else { // Intel 3210 if (supports_sse3() && cpu_family() == 6) { 3211 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3212 return 192; 3213 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3214 #ifdef _LP64 3215 return 384; 3216 #else 3217 return 320; 3218 #endif 3219 } 3220 } 3221 if (supports_sse2()) { 3222 if (cpu_family() == 6) { 3223 return 256; // Pentium M, Core, Core2 3224 } else { 3225 return 512; // Pentium 4 3226 } 3227 } else { 3228 return 128; // Pentium 3 (and all other old CPUs) 3229 } 3230 } 3231 } 3232 3233 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) { 3234 assert(id != vmIntrinsics::_none, "must be a VM intrinsic"); 3235 switch (id) { 3236 case vmIntrinsics::_floatToFloat16: 3237 case vmIntrinsics::_float16ToFloat: 3238 if (!supports_float16()) { 3239 return false; 3240 } 3241 break; 3242 default: 3243 break; 3244 } 3245 return true; 3246 }