1 /* 2 * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "asm/macroAssembler.hpp" 27 #include "asm/macroAssembler.inline.hpp" 28 #include "code/codeBlob.hpp" 29 #include "compiler/compilerDefinitions.inline.hpp" 30 #include "jvm.h" 31 #include "logging/log.hpp" 32 #include "logging/logStream.hpp" 33 #include "memory/resourceArea.hpp" 34 #include "memory/universe.hpp" 35 #include "runtime/globals_extension.hpp" 36 #include "runtime/java.hpp" 37 #include "runtime/os.inline.hpp" 38 #include "runtime/stubCodeGenerator.hpp" 39 #include "runtime/vm_version.hpp" 40 #include "utilities/powerOfTwo.hpp" 41 #include "utilities/virtualizationSupport.hpp" 42 43 int VM_Version::_cpu; 44 int VM_Version::_model; 45 int VM_Version::_stepping; 46 bool VM_Version::_has_intel_jcc_erratum; 47 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 48 49 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 50 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 51 #undef DECLARE_CPU_FEATURE_FLAG 52 53 // Address of instruction which causes SEGV 54 address VM_Version::_cpuinfo_segv_addr = 0; 55 // Address of instruction after the one which causes SEGV 56 address VM_Version::_cpuinfo_cont_addr = 0; 57 58 static BufferBlob* stub_blob; 59 static const int stub_size = 2000; 60 61 extern "C" { 62 typedef void (*get_cpu_info_stub_t)(void*); 63 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 64 } 65 static get_cpu_info_stub_t get_cpu_info_stub = NULL; 66 static detect_virt_stub_t detect_virt_stub = NULL; 67 68 #ifdef _LP64 69 70 bool VM_Version::supports_clflush() { 71 // clflush should always be available on x86_64 72 // if not we are in real trouble because we rely on it 73 // to flush the code cache. 74 // Unfortunately, Assembler::clflush is currently called as part 75 // of generation of the code cache flush routine. This happens 76 // under Universe::init before the processor features are set 77 // up. Assembler::flush calls this routine to check that clflush 78 // is allowed. So, we give the caller a free pass if Universe init 79 // is still in progress. 80 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 81 return true; 82 } 83 #endif 84 85 #define CPUID_STANDARD_FN 0x0 86 #define CPUID_STANDARD_FN_1 0x1 87 #define CPUID_STANDARD_FN_4 0x4 88 #define CPUID_STANDARD_FN_B 0xb 89 90 #define CPUID_EXTENDED_FN 0x80000000 91 #define CPUID_EXTENDED_FN_1 0x80000001 92 #define CPUID_EXTENDED_FN_2 0x80000002 93 #define CPUID_EXTENDED_FN_3 0x80000003 94 #define CPUID_EXTENDED_FN_4 0x80000004 95 #define CPUID_EXTENDED_FN_7 0x80000007 96 #define CPUID_EXTENDED_FN_8 0x80000008 97 98 class VM_Version_StubGenerator: public StubCodeGenerator { 99 public: 100 101 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 102 103 address generate_get_cpu_info() { 104 // Flags to test CPU type. 105 const uint32_t HS_EFL_AC = 0x40000; 106 const uint32_t HS_EFL_ID = 0x200000; 107 // Values for when we don't have a CPUID instruction. 108 const int CPU_FAMILY_SHIFT = 8; 109 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 110 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 111 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 112 113 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 114 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; 115 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 116 117 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 118 # define __ _masm-> 119 120 address start = __ pc(); 121 122 // 123 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 124 // 125 // LP64: rcx and rdx are first and second argument registers on windows 126 127 __ push(rbp); 128 #ifdef _LP64 129 __ mov(rbp, c_rarg0); // cpuid_info address 130 #else 131 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 132 #endif 133 __ push(rbx); 134 __ push(rsi); 135 __ pushf(); // preserve rbx, and flags 136 __ pop(rax); 137 __ push(rax); 138 __ mov(rcx, rax); 139 // 140 // if we are unable to change the AC flag, we have a 386 141 // 142 __ xorl(rax, HS_EFL_AC); 143 __ push(rax); 144 __ popf(); 145 __ pushf(); 146 __ pop(rax); 147 __ cmpptr(rax, rcx); 148 __ jccb(Assembler::notEqual, detect_486); 149 150 __ movl(rax, CPU_FAMILY_386); 151 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 152 __ jmp(done); 153 154 // 155 // If we are unable to change the ID flag, we have a 486 which does 156 // not support the "cpuid" instruction. 157 // 158 __ bind(detect_486); 159 __ mov(rax, rcx); 160 __ xorl(rax, HS_EFL_ID); 161 __ push(rax); 162 __ popf(); 163 __ pushf(); 164 __ pop(rax); 165 __ cmpptr(rcx, rax); 166 __ jccb(Assembler::notEqual, detect_586); 167 168 __ bind(cpu486); 169 __ movl(rax, CPU_FAMILY_486); 170 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 171 __ jmp(done); 172 173 // 174 // At this point, we have a chip which supports the "cpuid" instruction 175 // 176 __ bind(detect_586); 177 __ xorl(rax, rax); 178 __ cpuid(); 179 __ orl(rax, rax); 180 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 181 // value of at least 1, we give up and 182 // assume a 486 183 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 184 __ movl(Address(rsi, 0), rax); 185 __ movl(Address(rsi, 4), rbx); 186 __ movl(Address(rsi, 8), rcx); 187 __ movl(Address(rsi,12), rdx); 188 189 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 190 __ jccb(Assembler::belowEqual, std_cpuid4); 191 192 // 193 // cpuid(0xB) Processor Topology 194 // 195 __ movl(rax, 0xb); 196 __ xorl(rcx, rcx); // Threads level 197 __ cpuid(); 198 199 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 200 __ movl(Address(rsi, 0), rax); 201 __ movl(Address(rsi, 4), rbx); 202 __ movl(Address(rsi, 8), rcx); 203 __ movl(Address(rsi,12), rdx); 204 205 __ movl(rax, 0xb); 206 __ movl(rcx, 1); // Cores level 207 __ cpuid(); 208 __ push(rax); 209 __ andl(rax, 0x1f); // Determine if valid topology level 210 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 211 __ andl(rax, 0xffff); 212 __ pop(rax); 213 __ jccb(Assembler::equal, std_cpuid4); 214 215 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 216 __ movl(Address(rsi, 0), rax); 217 __ movl(Address(rsi, 4), rbx); 218 __ movl(Address(rsi, 8), rcx); 219 __ movl(Address(rsi,12), rdx); 220 221 __ movl(rax, 0xb); 222 __ movl(rcx, 2); // Packages level 223 __ cpuid(); 224 __ push(rax); 225 __ andl(rax, 0x1f); // Determine if valid topology level 226 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 227 __ andl(rax, 0xffff); 228 __ pop(rax); 229 __ jccb(Assembler::equal, std_cpuid4); 230 231 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 232 __ movl(Address(rsi, 0), rax); 233 __ movl(Address(rsi, 4), rbx); 234 __ movl(Address(rsi, 8), rcx); 235 __ movl(Address(rsi,12), rdx); 236 237 // 238 // cpuid(0x4) Deterministic cache params 239 // 240 __ bind(std_cpuid4); 241 __ movl(rax, 4); 242 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 243 __ jccb(Assembler::greater, std_cpuid1); 244 245 __ xorl(rcx, rcx); // L1 cache 246 __ cpuid(); 247 __ push(rax); 248 __ andl(rax, 0x1f); // Determine if valid cache parameters used 249 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 250 __ pop(rax); 251 __ jccb(Assembler::equal, std_cpuid1); 252 253 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 254 __ movl(Address(rsi, 0), rax); 255 __ movl(Address(rsi, 4), rbx); 256 __ movl(Address(rsi, 8), rcx); 257 __ movl(Address(rsi,12), rdx); 258 259 // 260 // Standard cpuid(0x1) 261 // 262 __ bind(std_cpuid1); 263 __ movl(rax, 1); 264 __ cpuid(); 265 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 266 __ movl(Address(rsi, 0), rax); 267 __ movl(Address(rsi, 4), rbx); 268 __ movl(Address(rsi, 8), rcx); 269 __ movl(Address(rsi,12), rdx); 270 271 // 272 // Check if OS has enabled XGETBV instruction to access XCR0 273 // (OSXSAVE feature flag) and CPU supports AVX 274 // 275 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 276 __ cmpl(rcx, 0x18000000); 277 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 278 279 // 280 // XCR0, XFEATURE_ENABLED_MASK register 281 // 282 __ xorl(rcx, rcx); // zero for XCR0 register 283 __ xgetbv(); 284 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 285 __ movl(Address(rsi, 0), rax); 286 __ movl(Address(rsi, 4), rdx); 287 288 // 289 // cpuid(0x7) Structured Extended Features 290 // 291 __ bind(sef_cpuid); 292 __ movl(rax, 7); 293 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 294 __ jccb(Assembler::greater, ext_cpuid); 295 296 __ xorl(rcx, rcx); 297 __ cpuid(); 298 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 299 __ movl(Address(rsi, 0), rax); 300 __ movl(Address(rsi, 4), rbx); 301 __ movl(Address(rsi, 8), rcx); 302 __ movl(Address(rsi, 12), rdx); 303 304 // 305 // Extended cpuid(0x80000000) 306 // 307 __ bind(ext_cpuid); 308 __ movl(rax, 0x80000000); 309 __ cpuid(); 310 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 311 __ jcc(Assembler::belowEqual, done); 312 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 313 __ jcc(Assembler::belowEqual, ext_cpuid1); 314 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 315 __ jccb(Assembler::belowEqual, ext_cpuid5); 316 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 317 __ jccb(Assembler::belowEqual, ext_cpuid7); 318 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 319 __ jccb(Assembler::belowEqual, ext_cpuid8); 320 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 321 __ jccb(Assembler::below, ext_cpuid8); 322 // 323 // Extended cpuid(0x8000001E) 324 // 325 __ movl(rax, 0x8000001E); 326 __ cpuid(); 327 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 328 __ movl(Address(rsi, 0), rax); 329 __ movl(Address(rsi, 4), rbx); 330 __ movl(Address(rsi, 8), rcx); 331 __ movl(Address(rsi,12), rdx); 332 333 // 334 // Extended cpuid(0x80000008) 335 // 336 __ bind(ext_cpuid8); 337 __ movl(rax, 0x80000008); 338 __ cpuid(); 339 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 340 __ movl(Address(rsi, 0), rax); 341 __ movl(Address(rsi, 4), rbx); 342 __ movl(Address(rsi, 8), rcx); 343 __ movl(Address(rsi,12), rdx); 344 345 // 346 // Extended cpuid(0x80000007) 347 // 348 __ bind(ext_cpuid7); 349 __ movl(rax, 0x80000007); 350 __ cpuid(); 351 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 352 __ movl(Address(rsi, 0), rax); 353 __ movl(Address(rsi, 4), rbx); 354 __ movl(Address(rsi, 8), rcx); 355 __ movl(Address(rsi,12), rdx); 356 357 // 358 // Extended cpuid(0x80000005) 359 // 360 __ bind(ext_cpuid5); 361 __ movl(rax, 0x80000005); 362 __ cpuid(); 363 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 364 __ movl(Address(rsi, 0), rax); 365 __ movl(Address(rsi, 4), rbx); 366 __ movl(Address(rsi, 8), rcx); 367 __ movl(Address(rsi,12), rdx); 368 369 // 370 // Extended cpuid(0x80000001) 371 // 372 __ bind(ext_cpuid1); 373 __ movl(rax, 0x80000001); 374 __ cpuid(); 375 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 376 __ movl(Address(rsi, 0), rax); 377 __ movl(Address(rsi, 4), rbx); 378 __ movl(Address(rsi, 8), rcx); 379 __ movl(Address(rsi,12), rdx); 380 381 // 382 // Check if OS has enabled XGETBV instruction to access XCR0 383 // (OSXSAVE feature flag) and CPU supports AVX 384 // 385 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 386 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 387 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 388 __ cmpl(rcx, 0x18000000); 389 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 390 391 __ movl(rax, 0x6); 392 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 393 __ cmpl(rax, 0x6); 394 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 395 396 // we need to bridge farther than imm8, so we use this island as a thunk 397 __ bind(done); 398 __ jmp(wrapup); 399 400 __ bind(start_simd_check); 401 // 402 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 403 // registers are not restored after a signal processing. 404 // Generate SEGV here (reference through NULL) 405 // and check upper YMM/ZMM bits after it. 406 // 407 int saved_useavx = UseAVX; 408 int saved_usesse = UseSSE; 409 410 // If UseAVX is uninitialized or is set by the user to include EVEX 411 if (use_evex) { 412 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 413 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 414 __ movl(rax, 0x10000); 415 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 416 __ cmpl(rax, 0x10000); 417 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 418 // check _cpuid_info.xem_xcr0_eax.bits.opmask 419 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 420 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 421 __ movl(rax, 0xE0); 422 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 423 __ cmpl(rax, 0xE0); 424 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 425 426 if (FLAG_IS_DEFAULT(UseAVX)) { 427 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 428 __ movl(rax, Address(rsi, 0)); 429 __ cmpl(rax, 0x50654); // If it is Skylake 430 __ jcc(Assembler::equal, legacy_setup); 431 } 432 // EVEX setup: run in lowest evex mode 433 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 434 UseAVX = 3; 435 UseSSE = 2; 436 #ifdef _WINDOWS 437 // xmm5-xmm15 are not preserved by caller on windows 438 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 439 __ subptr(rsp, 64); 440 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 441 #ifdef _LP64 442 __ subptr(rsp, 64); 443 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 444 __ subptr(rsp, 64); 445 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 446 #endif // _LP64 447 #endif // _WINDOWS 448 449 // load value into all 64 bytes of zmm7 register 450 __ movl(rcx, VM_Version::ymm_test_value()); 451 __ movdl(xmm0, rcx); 452 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 453 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 454 #ifdef _LP64 455 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 456 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 457 #endif 458 VM_Version::clean_cpuFeatures(); 459 __ jmp(save_restore_except); 460 } 461 462 __ bind(legacy_setup); 463 // AVX setup 464 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 465 UseAVX = 1; 466 UseSSE = 2; 467 #ifdef _WINDOWS 468 __ subptr(rsp, 32); 469 __ vmovdqu(Address(rsp, 0), xmm7); 470 #ifdef _LP64 471 __ subptr(rsp, 32); 472 __ vmovdqu(Address(rsp, 0), xmm8); 473 __ subptr(rsp, 32); 474 __ vmovdqu(Address(rsp, 0), xmm15); 475 #endif // _LP64 476 #endif // _WINDOWS 477 478 // load value into all 32 bytes of ymm7 register 479 __ movl(rcx, VM_Version::ymm_test_value()); 480 481 __ movdl(xmm0, rcx); 482 __ pshufd(xmm0, xmm0, 0x00); 483 __ vinsertf128_high(xmm0, xmm0); 484 __ vmovdqu(xmm7, xmm0); 485 #ifdef _LP64 486 __ vmovdqu(xmm8, xmm0); 487 __ vmovdqu(xmm15, xmm0); 488 #endif 489 VM_Version::clean_cpuFeatures(); 490 491 __ bind(save_restore_except); 492 __ xorl(rsi, rsi); 493 VM_Version::set_cpuinfo_segv_addr(__ pc()); 494 // Generate SEGV 495 __ movl(rax, Address(rsi, 0)); 496 497 VM_Version::set_cpuinfo_cont_addr(__ pc()); 498 // Returns here after signal. Save xmm0 to check it later. 499 500 // If UseAVX is uninitialized or is set by the user to include EVEX 501 if (use_evex) { 502 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 503 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 504 __ movl(rax, 0x10000); 505 __ andl(rax, Address(rsi, 4)); 506 __ cmpl(rax, 0x10000); 507 __ jcc(Assembler::notEqual, legacy_save_restore); 508 // check _cpuid_info.xem_xcr0_eax.bits.opmask 509 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 510 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 511 __ movl(rax, 0xE0); 512 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 513 __ cmpl(rax, 0xE0); 514 __ jcc(Assembler::notEqual, legacy_save_restore); 515 516 if (FLAG_IS_DEFAULT(UseAVX)) { 517 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 518 __ movl(rax, Address(rsi, 0)); 519 __ cmpl(rax, 0x50654); // If it is Skylake 520 __ jcc(Assembler::equal, legacy_save_restore); 521 } 522 // EVEX check: run in lowest evex mode 523 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 524 UseAVX = 3; 525 UseSSE = 2; 526 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 527 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 528 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 529 #ifdef _LP64 530 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 531 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 532 #endif 533 534 #ifdef _WINDOWS 535 #ifdef _LP64 536 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 537 __ addptr(rsp, 64); 538 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 539 __ addptr(rsp, 64); 540 #endif // _LP64 541 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 542 __ addptr(rsp, 64); 543 #endif // _WINDOWS 544 generate_vzeroupper(wrapup); 545 VM_Version::clean_cpuFeatures(); 546 UseAVX = saved_useavx; 547 UseSSE = saved_usesse; 548 __ jmp(wrapup); 549 } 550 551 __ bind(legacy_save_restore); 552 // AVX check 553 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 554 UseAVX = 1; 555 UseSSE = 2; 556 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 557 __ vmovdqu(Address(rsi, 0), xmm0); 558 __ vmovdqu(Address(rsi, 32), xmm7); 559 #ifdef _LP64 560 __ vmovdqu(Address(rsi, 64), xmm8); 561 __ vmovdqu(Address(rsi, 96), xmm15); 562 #endif 563 564 #ifdef _WINDOWS 565 #ifdef _LP64 566 __ vmovdqu(xmm15, Address(rsp, 0)); 567 __ addptr(rsp, 32); 568 __ vmovdqu(xmm8, Address(rsp, 0)); 569 __ addptr(rsp, 32); 570 #endif // _LP64 571 __ vmovdqu(xmm7, Address(rsp, 0)); 572 __ addptr(rsp, 32); 573 #endif // _WINDOWS 574 generate_vzeroupper(wrapup); 575 VM_Version::clean_cpuFeatures(); 576 UseAVX = saved_useavx; 577 UseSSE = saved_usesse; 578 579 __ bind(wrapup); 580 __ popf(); 581 __ pop(rsi); 582 __ pop(rbx); 583 __ pop(rbp); 584 __ ret(0); 585 586 # undef __ 587 588 return start; 589 }; 590 void generate_vzeroupper(Label& L_wrapup) { 591 # define __ _masm-> 592 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 593 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 594 __ jcc(Assembler::notEqual, L_wrapup); 595 __ movl(rcx, 0x0FFF0FF0); 596 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 597 __ andl(rcx, Address(rsi, 0)); 598 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 599 __ jcc(Assembler::equal, L_wrapup); 600 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 601 __ jcc(Assembler::equal, L_wrapup); 602 // vzeroupper() will use a pre-computed instruction sequence that we 603 // can't compute until after we've determined CPU capabilities. Use 604 // uncached variant here directly to be able to bootstrap correctly 605 __ vzeroupper_uncached(); 606 # undef __ 607 } 608 address generate_detect_virt() { 609 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 610 # define __ _masm-> 611 612 address start = __ pc(); 613 614 // Evacuate callee-saved registers 615 __ push(rbp); 616 __ push(rbx); 617 __ push(rsi); // for Windows 618 619 #ifdef _LP64 620 __ mov(rax, c_rarg0); // CPUID leaf 621 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 622 #else 623 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 624 __ movptr(rsi, Address(rsp, 20)); // register array address 625 #endif 626 627 __ cpuid(); 628 629 // Store result to register array 630 __ movl(Address(rsi, 0), rax); 631 __ movl(Address(rsi, 4), rbx); 632 __ movl(Address(rsi, 8), rcx); 633 __ movl(Address(rsi, 12), rdx); 634 635 // Epilogue 636 __ pop(rsi); 637 __ pop(rbx); 638 __ pop(rbp); 639 __ ret(0); 640 641 # undef __ 642 643 return start; 644 }; 645 646 647 address generate_getCPUIDBrandString(void) { 648 // Flags to test CPU type. 649 const uint32_t HS_EFL_AC = 0x40000; 650 const uint32_t HS_EFL_ID = 0x200000; 651 // Values for when we don't have a CPUID instruction. 652 const int CPU_FAMILY_SHIFT = 8; 653 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 654 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 655 656 Label detect_486, cpu486, detect_586, done, ext_cpuid; 657 658 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 659 # define __ _masm-> 660 661 address start = __ pc(); 662 663 // 664 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 665 // 666 // LP64: rcx and rdx are first and second argument registers on windows 667 668 __ push(rbp); 669 #ifdef _LP64 670 __ mov(rbp, c_rarg0); // cpuid_info address 671 #else 672 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 673 #endif 674 __ push(rbx); 675 __ push(rsi); 676 __ pushf(); // preserve rbx, and flags 677 __ pop(rax); 678 __ push(rax); 679 __ mov(rcx, rax); 680 // 681 // if we are unable to change the AC flag, we have a 386 682 // 683 __ xorl(rax, HS_EFL_AC); 684 __ push(rax); 685 __ popf(); 686 __ pushf(); 687 __ pop(rax); 688 __ cmpptr(rax, rcx); 689 __ jccb(Assembler::notEqual, detect_486); 690 691 __ movl(rax, CPU_FAMILY_386); 692 __ jmp(done); 693 694 // 695 // If we are unable to change the ID flag, we have a 486 which does 696 // not support the "cpuid" instruction. 697 // 698 __ bind(detect_486); 699 __ mov(rax, rcx); 700 __ xorl(rax, HS_EFL_ID); 701 __ push(rax); 702 __ popf(); 703 __ pushf(); 704 __ pop(rax); 705 __ cmpptr(rcx, rax); 706 __ jccb(Assembler::notEqual, detect_586); 707 708 __ bind(cpu486); 709 __ movl(rax, CPU_FAMILY_486); 710 __ jmp(done); 711 712 // 713 // At this point, we have a chip which supports the "cpuid" instruction 714 // 715 __ bind(detect_586); 716 __ xorl(rax, rax); 717 __ cpuid(); 718 __ orl(rax, rax); 719 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 720 // value of at least 1, we give up and 721 // assume a 486 722 723 // 724 // Extended cpuid(0x80000000) for processor brand string detection 725 // 726 __ bind(ext_cpuid); 727 __ movl(rax, CPUID_EXTENDED_FN); 728 __ cpuid(); 729 __ cmpl(rax, CPUID_EXTENDED_FN_4); 730 __ jcc(Assembler::below, done); 731 732 // 733 // Extended cpuid(0x80000002) // first 16 bytes in brand string 734 // 735 __ movl(rax, CPUID_EXTENDED_FN_2); 736 __ cpuid(); 737 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 738 __ movl(Address(rsi, 0), rax); 739 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 740 __ movl(Address(rsi, 0), rbx); 741 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 742 __ movl(Address(rsi, 0), rcx); 743 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 744 __ movl(Address(rsi,0), rdx); 745 746 // 747 // Extended cpuid(0x80000003) // next 16 bytes in brand string 748 // 749 __ movl(rax, CPUID_EXTENDED_FN_3); 750 __ cpuid(); 751 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 752 __ movl(Address(rsi, 0), rax); 753 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 754 __ movl(Address(rsi, 0), rbx); 755 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 756 __ movl(Address(rsi, 0), rcx); 757 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 758 __ movl(Address(rsi,0), rdx); 759 760 // 761 // Extended cpuid(0x80000004) // last 16 bytes in brand string 762 // 763 __ movl(rax, CPUID_EXTENDED_FN_4); 764 __ cpuid(); 765 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 766 __ movl(Address(rsi, 0), rax); 767 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 768 __ movl(Address(rsi, 0), rbx); 769 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 770 __ movl(Address(rsi, 0), rcx); 771 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 772 __ movl(Address(rsi,0), rdx); 773 774 // 775 // return 776 // 777 __ bind(done); 778 __ popf(); 779 __ pop(rsi); 780 __ pop(rbx); 781 __ pop(rbp); 782 __ ret(0); 783 784 # undef __ 785 786 return start; 787 }; 788 }; 789 790 void VM_Version::get_processor_features() { 791 792 _cpu = 4; // 486 by default 793 _model = 0; 794 _stepping = 0; 795 _features = 0; 796 _logical_processors_per_package = 1; 797 // i486 internal cache is both I&D and has a 16-byte line size 798 _L1_data_cache_line_size = 16; 799 800 // Get raw processor info 801 802 get_cpu_info_stub(&_cpuid_info); 803 804 assert_is_initialized(); 805 _cpu = extended_cpu_family(); 806 _model = extended_cpu_model(); 807 _stepping = cpu_stepping(); 808 809 if (cpu_family() > 4) { // it supports CPUID 810 _features = feature_flags(); 811 // Logical processors are only available on P4s and above, 812 // and only if hyperthreading is available. 813 _logical_processors_per_package = logical_processor_count(); 814 _L1_data_cache_line_size = L1_line_size(); 815 } 816 817 _supports_cx8 = supports_cmpxchg8(); 818 // xchg and xadd instructions 819 _supports_atomic_getset4 = true; 820 _supports_atomic_getadd4 = true; 821 LP64_ONLY(_supports_atomic_getset8 = true); 822 LP64_ONLY(_supports_atomic_getadd8 = true); 823 824 #ifdef _LP64 825 // OS should support SSE for x64 and hardware should support at least SSE2. 826 if (!VM_Version::supports_sse2()) { 827 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 828 } 829 // in 64 bit the use of SSE2 is the minimum 830 if (UseSSE < 2) UseSSE = 2; 831 #endif 832 833 #ifdef AMD64 834 // flush_icache_stub have to be generated first. 835 // That is why Icache line size is hard coded in ICache class, 836 // see icache_x86.hpp. It is also the reason why we can't use 837 // clflush instruction in 32-bit VM since it could be running 838 // on CPU which does not support it. 839 // 840 // The only thing we can do is to verify that flushed 841 // ICache::line_size has correct value. 842 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 843 // clflush_size is size in quadwords (8 bytes). 844 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 845 #endif 846 847 #ifdef _LP64 848 // assigning this field effectively enables Unsafe.writebackMemory() 849 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 850 // that is only implemented on x86_64 and only if the OS plays ball 851 if (os::supports_map_sync()) { 852 // publish data cache line flush size to generic field, otherwise 853 // let if default to zero thereby disabling writeback 854 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 855 } 856 #endif 857 858 if (UseSSE < 4) { 859 _features &= ~CPU_SSE4_1; 860 _features &= ~CPU_SSE4_2; 861 } 862 863 if (UseSSE < 3) { 864 _features &= ~CPU_SSE3; 865 _features &= ~CPU_SSSE3; 866 _features &= ~CPU_SSE4A; 867 } 868 869 if (UseSSE < 2) 870 _features &= ~CPU_SSE2; 871 872 if (UseSSE < 1) 873 _features &= ~CPU_SSE; 874 875 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 876 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 877 UseAVX = 0; 878 } 879 880 // UseSSE is set to the smaller of what hardware supports and what 881 // the command line requires. I.e., you cannot set UseSSE to 2 on 882 // older Pentiums which do not support it. 883 int use_sse_limit = 0; 884 if (UseSSE > 0) { 885 if (UseSSE > 3 && supports_sse4_1()) { 886 use_sse_limit = 4; 887 } else if (UseSSE > 2 && supports_sse3()) { 888 use_sse_limit = 3; 889 } else if (UseSSE > 1 && supports_sse2()) { 890 use_sse_limit = 2; 891 } else if (UseSSE > 0 && supports_sse()) { 892 use_sse_limit = 1; 893 } else { 894 use_sse_limit = 0; 895 } 896 } 897 if (FLAG_IS_DEFAULT(UseSSE)) { 898 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 899 } else if (UseSSE > use_sse_limit) { 900 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit); 901 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 902 } 903 904 // first try initial setting and detect what we can support 905 int use_avx_limit = 0; 906 if (UseAVX > 0) { 907 if (UseSSE < 4) { 908 // Don't use AVX if SSE is unavailable or has been disabled. 909 use_avx_limit = 0; 910 } else if (UseAVX > 2 && supports_evex()) { 911 use_avx_limit = 3; 912 } else if (UseAVX > 1 && supports_avx2()) { 913 use_avx_limit = 2; 914 } else if (UseAVX > 0 && supports_avx()) { 915 use_avx_limit = 1; 916 } else { 917 use_avx_limit = 0; 918 } 919 } 920 if (FLAG_IS_DEFAULT(UseAVX)) { 921 // Don't use AVX-512 on older Skylakes unless explicitly requested. 922 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 923 FLAG_SET_DEFAULT(UseAVX, 2); 924 } else { 925 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 926 } 927 } 928 if (UseAVX > use_avx_limit) { 929 if (UseSSE < 4) { 930 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX); 931 } else { 932 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit); 933 } 934 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 935 } 936 937 if (UseAVX < 3) { 938 _features &= ~CPU_AVX512F; 939 _features &= ~CPU_AVX512DQ; 940 _features &= ~CPU_AVX512CD; 941 _features &= ~CPU_AVX512BW; 942 _features &= ~CPU_AVX512VL; 943 _features &= ~CPU_AVX512_VPOPCNTDQ; 944 _features &= ~CPU_AVX512_VPCLMULQDQ; 945 _features &= ~CPU_AVX512_VAES; 946 _features &= ~CPU_AVX512_VNNI; 947 _features &= ~CPU_AVX512_VBMI; 948 _features &= ~CPU_AVX512_VBMI2; 949 _features &= ~CPU_AVX512_BITALG; 950 _features &= ~CPU_AVX512_IFMA; 951 } 952 953 if (UseAVX < 2) 954 _features &= ~CPU_AVX2; 955 956 if (UseAVX < 1) { 957 _features &= ~CPU_AVX; 958 _features &= ~CPU_VZEROUPPER; 959 _features &= ~CPU_F16C; 960 } 961 962 if (logical_processors_per_package() == 1) { 963 // HT processor could be installed on a system which doesn't support HT. 964 _features &= ~CPU_HT; 965 } 966 967 if (is_intel()) { // Intel cpus specific settings 968 if (is_knights_family()) { 969 _features &= ~CPU_VZEROUPPER; 970 _features &= ~CPU_AVX512BW; 971 _features &= ~CPU_AVX512VL; 972 _features &= ~CPU_AVX512DQ; 973 _features &= ~CPU_AVX512_VNNI; 974 _features &= ~CPU_AVX512_VAES; 975 _features &= ~CPU_AVX512_VPOPCNTDQ; 976 _features &= ~CPU_AVX512_VPCLMULQDQ; 977 _features &= ~CPU_AVX512_VBMI; 978 _features &= ~CPU_AVX512_VBMI2; 979 _features &= ~CPU_CLWB; 980 _features &= ~CPU_FLUSHOPT; 981 _features &= ~CPU_GFNI; 982 _features &= ~CPU_AVX512_BITALG; 983 _features &= ~CPU_AVX512_IFMA; 984 } 985 } 986 987 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 988 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 989 } else { 990 _has_intel_jcc_erratum = IntelJccErratumMitigation; 991 } 992 993 char buf[1024]; 994 int res = jio_snprintf( 995 buf, sizeof(buf), 996 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 997 cores_per_cpu(), threads_per_core(), 998 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 999 assert(res > 0, "not enough temporary space allocated"); 1000 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 1001 1002 _features_string = os::strdup(buf); 1003 1004 // Use AES instructions if available. 1005 if (supports_aes()) { 1006 if (FLAG_IS_DEFAULT(UseAES)) { 1007 FLAG_SET_DEFAULT(UseAES, true); 1008 } 1009 if (!UseAES) { 1010 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1011 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1012 } 1013 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1014 } else { 1015 if (UseSSE > 2) { 1016 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1017 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1018 } 1019 } else { 1020 // The AES intrinsic stubs require AES instruction support (of course) 1021 // but also require sse3 mode or higher for instructions it use. 1022 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1023 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1024 } 1025 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1026 } 1027 1028 // --AES-CTR begins-- 1029 if (!UseAESIntrinsics) { 1030 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1031 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1032 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1033 } 1034 } else { 1035 if (supports_sse4_1()) { 1036 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1037 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1038 } 1039 } else { 1040 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1041 // but also require sse4.1 mode or higher for instructions it use. 1042 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1043 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1044 } 1045 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1046 } 1047 } 1048 // --AES-CTR ends-- 1049 } 1050 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1051 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1052 warning("AES instructions are not available on this CPU"); 1053 FLAG_SET_DEFAULT(UseAES, false); 1054 } 1055 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1056 warning("AES intrinsics are not available on this CPU"); 1057 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1058 } 1059 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1060 warning("AES-CTR intrinsics are not available on this CPU"); 1061 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1062 } 1063 } 1064 1065 // Use CLMUL instructions if available. 1066 if (supports_clmul()) { 1067 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1068 UseCLMUL = true; 1069 } 1070 } else if (UseCLMUL) { 1071 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1072 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1073 FLAG_SET_DEFAULT(UseCLMUL, false); 1074 } 1075 1076 if (UseCLMUL && (UseSSE > 2)) { 1077 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1078 UseCRC32Intrinsics = true; 1079 } 1080 } else if (UseCRC32Intrinsics) { 1081 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1082 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1083 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1084 } 1085 1086 #ifdef _LP64 1087 if (supports_avx2()) { 1088 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1089 UseAdler32Intrinsics = true; 1090 } 1091 } else if (UseAdler32Intrinsics) { 1092 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1093 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1094 } 1095 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1096 } 1097 #else 1098 if (UseAdler32Intrinsics) { 1099 warning("Adler32Intrinsics not available on this CPU."); 1100 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1101 } 1102 #endif 1103 1104 if (supports_sse4_2() && supports_clmul()) { 1105 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1106 UseCRC32CIntrinsics = true; 1107 } 1108 } else if (UseCRC32CIntrinsics) { 1109 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1110 warning("CRC32C intrinsics are not available on this CPU"); 1111 } 1112 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1113 } 1114 1115 // GHASH/GCM intrinsics 1116 if (UseCLMUL && (UseSSE > 2)) { 1117 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1118 UseGHASHIntrinsics = true; 1119 } 1120 } else if (UseGHASHIntrinsics) { 1121 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1122 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1123 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1124 } 1125 1126 // ChaCha20 Intrinsics 1127 // As long as the system supports AVX as a baseline we can do a 1128 // SIMD-enabled block function. StubGenerator makes the determination 1129 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled 1130 // version. 1131 if (UseAVX >= 1) { 1132 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1133 UseChaCha20Intrinsics = true; 1134 } 1135 } else if (UseChaCha20Intrinsics) { 1136 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) { 1137 warning("ChaCha20 intrinsic requires AVX instructions"); 1138 } 1139 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false); 1140 } 1141 1142 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1143 if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) { 1144 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1145 UseBASE64Intrinsics = true; 1146 } 1147 } else if (UseBASE64Intrinsics) { 1148 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1149 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1150 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1151 } 1152 1153 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1154 if (FLAG_IS_DEFAULT(UseFMA)) { 1155 UseFMA = true; 1156 } 1157 } else if (UseFMA) { 1158 warning("FMA instructions are not available on this CPU"); 1159 FLAG_SET_DEFAULT(UseFMA, false); 1160 } 1161 1162 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1163 UseMD5Intrinsics = true; 1164 } 1165 1166 if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) { 1167 if (FLAG_IS_DEFAULT(UseSHA)) { 1168 UseSHA = true; 1169 } 1170 } else if (UseSHA) { 1171 warning("SHA instructions are not available on this CPU"); 1172 FLAG_SET_DEFAULT(UseSHA, false); 1173 } 1174 1175 if (supports_sha() && supports_sse4_1() && UseSHA) { 1176 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1177 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1178 } 1179 } else if (UseSHA1Intrinsics) { 1180 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1181 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1182 } 1183 1184 if (supports_sse4_1() && UseSHA) { 1185 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1186 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1187 } 1188 } else if (UseSHA256Intrinsics) { 1189 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1190 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1191 } 1192 1193 #ifdef _LP64 1194 // These are only supported on 64-bit 1195 if (UseSHA && supports_avx2() && supports_bmi2()) { 1196 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1197 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1198 } 1199 } else 1200 #endif 1201 if (UseSHA512Intrinsics) { 1202 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1203 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1204 } 1205 1206 if (UseSHA3Intrinsics) { 1207 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1208 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1209 } 1210 1211 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1212 FLAG_SET_DEFAULT(UseSHA, false); 1213 } 1214 1215 if (!supports_rtm() && UseRTMLocking) { 1216 vm_exit_during_initialization("RTM instructions are not available on this CPU"); 1217 } 1218 1219 #if INCLUDE_RTM_OPT 1220 if (UseRTMLocking) { 1221 if (!CompilerConfig::is_c2_enabled()) { 1222 // Only C2 does RTM locking optimization. 1223 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1224 } 1225 if (is_intel_family_core()) { 1226 if ((_model == CPU_MODEL_HASWELL_E3) || 1227 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || 1228 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { 1229 // currently a collision between SKL and HSW_E3 1230 if (!UnlockExperimentalVMOptions && UseAVX < 3) { 1231 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " 1232 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); 1233 } else { 1234 warning("UseRTMLocking is only available as experimental option on this platform."); 1235 } 1236 } 1237 } 1238 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { 1239 // RTM locking should be used only for applications with 1240 // high lock contention. For now we do not use it by default. 1241 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); 1242 } 1243 } else { // !UseRTMLocking 1244 if (UseRTMForStackLocks) { 1245 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { 1246 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); 1247 } 1248 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); 1249 } 1250 if (UseRTMDeopt) { 1251 FLAG_SET_DEFAULT(UseRTMDeopt, false); 1252 } 1253 if (PrintPreciseRTMLockingStatistics) { 1254 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); 1255 } 1256 } 1257 #else 1258 if (UseRTMLocking) { 1259 // Only C2 does RTM locking optimization. 1260 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1261 } 1262 #endif 1263 1264 #ifdef COMPILER2 1265 if (UseFPUForSpilling) { 1266 if (UseSSE < 2) { 1267 // Only supported with SSE2+ 1268 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1269 } 1270 } 1271 #endif 1272 1273 #if COMPILER2_OR_JVMCI 1274 int max_vector_size = 0; 1275 if (UseSSE < 2) { 1276 // Vectors (in XMM) are only supported with SSE2+ 1277 // SSE is always 2 on x64. 1278 max_vector_size = 0; 1279 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1280 // 16 byte vectors (in XMM) are supported with SSE2+ 1281 max_vector_size = 16; 1282 } else if (UseAVX == 1 || UseAVX == 2) { 1283 // 32 bytes vectors (in YMM) are only supported with AVX+ 1284 max_vector_size = 32; 1285 } else if (UseAVX > 2) { 1286 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1287 max_vector_size = 64; 1288 } 1289 1290 #ifdef _LP64 1291 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1292 #else 1293 int min_vector_size = 0; 1294 #endif 1295 1296 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1297 if (MaxVectorSize < min_vector_size) { 1298 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1299 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1300 } 1301 if (MaxVectorSize > max_vector_size) { 1302 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1303 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1304 } 1305 if (!is_power_of_2(MaxVectorSize)) { 1306 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1307 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1308 } 1309 } else { 1310 // If default, use highest supported configuration 1311 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1312 } 1313 1314 #if defined(COMPILER2) 1315 if (FLAG_IS_DEFAULT(SuperWordMaxVectorSize)) { 1316 if (FLAG_IS_DEFAULT(UseAVX) && UseAVX > 2 && 1317 is_intel_skylake() && _stepping >= 5) { 1318 // Limit auto vectorization to 256 bit (32 byte) by default on Cascade Lake 1319 FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MIN2(MaxVectorSize, (intx)32)); 1320 } else { 1321 FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); 1322 } 1323 } else { 1324 if (SuperWordMaxVectorSize > MaxVectorSize) { 1325 warning("SuperWordMaxVectorSize cannot be greater than MaxVectorSize %i", (int) MaxVectorSize); 1326 FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); 1327 } 1328 if (!is_power_of_2(SuperWordMaxVectorSize)) { 1329 warning("SuperWordMaxVectorSize must be a power of 2, setting to MaxVectorSize: %i", (int) MaxVectorSize); 1330 FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize); 1331 } 1332 } 1333 #endif 1334 1335 #if defined(COMPILER2) && defined(ASSERT) 1336 if (MaxVectorSize > 0) { 1337 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1338 tty->print_cr("State of YMM registers after signal handle:"); 1339 int nreg = 2 LP64_ONLY(+2); 1340 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1341 for (int i = 0; i < nreg; i++) { 1342 tty->print("YMM%s:", ymm_name[i]); 1343 for (int j = 7; j >=0; j--) { 1344 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1345 } 1346 tty->cr(); 1347 } 1348 } 1349 } 1350 #endif // COMPILER2 && ASSERT 1351 1352 #ifdef _LP64 1353 if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) { 1354 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) { 1355 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true); 1356 } 1357 } else 1358 #endif 1359 if (UsePoly1305Intrinsics) { 1360 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU."); 1361 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false); 1362 } 1363 1364 #ifdef _LP64 1365 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1366 UseMultiplyToLenIntrinsic = true; 1367 } 1368 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1369 UseSquareToLenIntrinsic = true; 1370 } 1371 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1372 UseMulAddIntrinsic = true; 1373 } 1374 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1375 UseMontgomeryMultiplyIntrinsic = true; 1376 } 1377 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1378 UseMontgomerySquareIntrinsic = true; 1379 } 1380 #else 1381 if (UseMultiplyToLenIntrinsic) { 1382 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1383 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1384 } 1385 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1386 } 1387 if (UseMontgomeryMultiplyIntrinsic) { 1388 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1389 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1390 } 1391 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1392 } 1393 if (UseMontgomerySquareIntrinsic) { 1394 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1395 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1396 } 1397 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1398 } 1399 if (UseSquareToLenIntrinsic) { 1400 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1401 warning("squareToLen intrinsic is not available in 32-bit VM"); 1402 } 1403 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1404 } 1405 if (UseMulAddIntrinsic) { 1406 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1407 warning("mulAdd intrinsic is not available in 32-bit VM"); 1408 } 1409 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1410 } 1411 #endif // _LP64 1412 #endif // COMPILER2_OR_JVMCI 1413 1414 // On new cpus instructions which update whole XMM register should be used 1415 // to prevent partial register stall due to dependencies on high half. 1416 // 1417 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1418 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1419 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1420 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1421 1422 1423 if (is_zx()) { // ZX cpus specific settings 1424 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1425 UseStoreImmI16 = false; // don't use it on ZX cpus 1426 } 1427 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1428 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1429 // Use it on all ZX cpus 1430 UseAddressNop = true; 1431 } 1432 } 1433 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1434 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1435 } 1436 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1437 if (supports_sse3()) { 1438 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1439 } else { 1440 UseXmmRegToRegMoveAll = false; 1441 } 1442 } 1443 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1444 #ifdef COMPILER2 1445 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1446 // For new ZX cpus do the next optimization: 1447 // don't align the beginning of a loop if there are enough instructions 1448 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1449 // in current fetch line (OptoLoopAlignment) or the padding 1450 // is big (> MaxLoopPad). 1451 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1452 // generated NOP instructions. 11 is the largest size of one 1453 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1454 MaxLoopPad = 11; 1455 } 1456 #endif // COMPILER2 1457 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1458 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1459 } 1460 if (supports_sse4_2()) { // new ZX cpus 1461 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1462 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1463 } 1464 } 1465 if (supports_sse4_2()) { 1466 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1467 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1468 } 1469 } else { 1470 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1471 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1472 } 1473 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1474 } 1475 } 1476 1477 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1478 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1479 } 1480 } 1481 1482 if (is_amd_family()) { // AMD cpus specific settings 1483 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1484 // Use it on new AMD cpus starting from Opteron. 1485 UseAddressNop = true; 1486 } 1487 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1488 // Use it on new AMD cpus starting from Opteron. 1489 UseNewLongLShift = true; 1490 } 1491 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1492 if (supports_sse4a()) { 1493 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1494 } else { 1495 UseXmmLoadAndClearUpper = false; 1496 } 1497 } 1498 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1499 if (supports_sse4a()) { 1500 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1501 } else { 1502 UseXmmRegToRegMoveAll = false; 1503 } 1504 } 1505 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1506 if (supports_sse4a()) { 1507 UseXmmI2F = true; 1508 } else { 1509 UseXmmI2F = false; 1510 } 1511 } 1512 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1513 if (supports_sse4a()) { 1514 UseXmmI2D = true; 1515 } else { 1516 UseXmmI2D = false; 1517 } 1518 } 1519 if (supports_sse4_2()) { 1520 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1521 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1522 } 1523 } else { 1524 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1525 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1526 } 1527 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1528 } 1529 1530 // some defaults for AMD family 15h 1531 if (cpu_family() == 0x15) { 1532 // On family 15h processors default is no sw prefetch 1533 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1534 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1535 } 1536 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1537 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1538 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1539 } 1540 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1541 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1542 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1543 } 1544 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1545 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1546 } 1547 } 1548 1549 #ifdef COMPILER2 1550 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1551 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1552 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1553 } 1554 #endif // COMPILER2 1555 1556 // Some defaults for AMD family >= 17h && Hygon family 18h 1557 if (cpu_family() >= 0x17) { 1558 // On family >=17h processors use XMM and UnalignedLoadStores 1559 // for Array Copy 1560 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1561 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1562 } 1563 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1564 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1565 } 1566 #ifdef COMPILER2 1567 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1568 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1569 } 1570 #endif 1571 } 1572 } 1573 1574 if (is_intel()) { // Intel cpus specific settings 1575 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1576 UseStoreImmI16 = false; // don't use it on Intel cpus 1577 } 1578 if (cpu_family() == 6 || cpu_family() == 15) { 1579 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1580 // Use it on all Intel cpus starting from PentiumPro 1581 UseAddressNop = true; 1582 } 1583 } 1584 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1585 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1586 } 1587 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1588 if (supports_sse3()) { 1589 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1590 } else { 1591 UseXmmRegToRegMoveAll = false; 1592 } 1593 } 1594 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1595 #ifdef COMPILER2 1596 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1597 // For new Intel cpus do the next optimization: 1598 // don't align the beginning of a loop if there are enough instructions 1599 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1600 // in current fetch line (OptoLoopAlignment) or the padding 1601 // is big (> MaxLoopPad). 1602 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1603 // generated NOP instructions. 11 is the largest size of one 1604 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1605 MaxLoopPad = 11; 1606 } 1607 #endif // COMPILER2 1608 1609 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1610 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1611 } 1612 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1613 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1614 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1615 } 1616 } 1617 if (supports_sse4_2()) { 1618 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1619 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1620 } 1621 } else { 1622 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1623 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1624 } 1625 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1626 } 1627 } 1628 if (is_atom_family() || is_knights_family()) { 1629 #ifdef COMPILER2 1630 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1631 OptoScheduling = true; 1632 } 1633 #endif 1634 if (supports_sse4_2()) { // Silvermont 1635 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1636 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1637 } 1638 } 1639 if (FLAG_IS_DEFAULT(UseIncDec)) { 1640 FLAG_SET_DEFAULT(UseIncDec, false); 1641 } 1642 } 1643 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1644 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1645 } 1646 #ifdef COMPILER2 1647 if (UseAVX > 2) { 1648 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1649 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1650 ArrayOperationPartialInlineSize != 0 && 1651 ArrayOperationPartialInlineSize != 16 && 1652 ArrayOperationPartialInlineSize != 32 && 1653 ArrayOperationPartialInlineSize != 64)) { 1654 int inline_size = 0; 1655 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1656 inline_size = 64; 1657 } else if (MaxVectorSize >= 32) { 1658 inline_size = 32; 1659 } else if (MaxVectorSize >= 16) { 1660 inline_size = 16; 1661 } 1662 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1663 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1664 } 1665 ArrayOperationPartialInlineSize = inline_size; 1666 } 1667 1668 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1669 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1670 if (ArrayOperationPartialInlineSize) { 1671 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1672 } else { 1673 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1674 } 1675 } 1676 } 1677 #endif 1678 } 1679 1680 #ifdef COMPILER2 1681 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1682 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1683 OptimizeFill = false; 1684 } 1685 } 1686 #endif 1687 1688 #ifdef _LP64 1689 if (UseSSE42Intrinsics) { 1690 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1691 UseVectorizedMismatchIntrinsic = true; 1692 } 1693 } else if (UseVectorizedMismatchIntrinsic) { 1694 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1695 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1696 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1697 } 1698 #else 1699 if (UseVectorizedMismatchIntrinsic) { 1700 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1701 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1702 } 1703 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1704 } 1705 #endif // _LP64 1706 1707 // Use count leading zeros count instruction if available. 1708 if (supports_lzcnt()) { 1709 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1710 UseCountLeadingZerosInstruction = true; 1711 } 1712 } else if (UseCountLeadingZerosInstruction) { 1713 warning("lzcnt instruction is not available on this CPU"); 1714 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1715 } 1716 1717 // Use count trailing zeros instruction if available 1718 if (supports_bmi1()) { 1719 // tzcnt does not require VEX prefix 1720 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1721 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1722 // Don't use tzcnt if BMI1 is switched off on command line. 1723 UseCountTrailingZerosInstruction = false; 1724 } else { 1725 UseCountTrailingZerosInstruction = true; 1726 } 1727 } 1728 } else if (UseCountTrailingZerosInstruction) { 1729 warning("tzcnt instruction is not available on this CPU"); 1730 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1731 } 1732 1733 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1734 // VEX prefix is generated only when AVX > 0. 1735 if (supports_bmi1() && supports_avx()) { 1736 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1737 UseBMI1Instructions = true; 1738 } 1739 } else if (UseBMI1Instructions) { 1740 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1741 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1742 } 1743 1744 if (supports_bmi2() && supports_avx()) { 1745 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1746 UseBMI2Instructions = true; 1747 } 1748 } else if (UseBMI2Instructions) { 1749 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1750 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1751 } 1752 1753 // Use population count instruction if available. 1754 if (supports_popcnt()) { 1755 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1756 UsePopCountInstruction = true; 1757 } 1758 } else if (UsePopCountInstruction) { 1759 warning("POPCNT instruction is not available on this CPU"); 1760 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1761 } 1762 1763 // Use fast-string operations if available. 1764 if (supports_erms()) { 1765 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1766 UseFastStosb = true; 1767 } 1768 } else if (UseFastStosb) { 1769 warning("fast-string operations are not available on this CPU"); 1770 FLAG_SET_DEFAULT(UseFastStosb, false); 1771 } 1772 1773 // For AMD Processors use XMM/YMM MOVDQU instructions 1774 // for Object Initialization as default 1775 if (is_amd() && cpu_family() >= 0x19) { 1776 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1777 UseFastStosb = false; 1778 } 1779 } 1780 1781 #ifdef COMPILER2 1782 if (is_intel() && MaxVectorSize > 16) { 1783 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1784 UseFastStosb = false; 1785 } 1786 } 1787 #endif 1788 1789 // Use XMM/YMM MOVDQU instruction for Object Initialization 1790 if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) { 1791 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1792 UseXMMForObjInit = true; 1793 } 1794 } else if (UseXMMForObjInit) { 1795 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1796 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1797 } 1798 1799 #ifdef COMPILER2 1800 if (FLAG_IS_DEFAULT(AlignVector)) { 1801 // Modern processors allow misaligned memory operations for vectors. 1802 AlignVector = !UseUnalignedLoadStores; 1803 } 1804 #endif // COMPILER2 1805 1806 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1807 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1808 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1809 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1810 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1811 } 1812 } 1813 1814 // Allocation prefetch settings 1815 intx cache_line_size = prefetch_data_size(); 1816 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1817 (cache_line_size > AllocatePrefetchStepSize)) { 1818 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1819 } 1820 1821 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1822 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1823 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1824 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1825 } 1826 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1827 } 1828 1829 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1830 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1831 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1832 } 1833 1834 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1835 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1836 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1837 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1838 } 1839 #ifdef COMPILER2 1840 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1841 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1842 } 1843 #endif 1844 } 1845 1846 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1847 #ifdef COMPILER2 1848 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1849 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1850 } 1851 #endif 1852 } 1853 1854 #ifdef _LP64 1855 // Prefetch settings 1856 1857 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1858 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1859 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1860 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1861 1862 // gc copy/scan is disabled if prefetchw isn't supported, because 1863 // Prefetch::write emits an inlined prefetchw on Linux. 1864 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1865 // The used prefetcht0 instruction works for both amd64 and em64t. 1866 1867 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1868 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1869 } 1870 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1871 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1872 } 1873 #endif 1874 1875 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1876 (cache_line_size > ContendedPaddingWidth)) 1877 ContendedPaddingWidth = cache_line_size; 1878 1879 // This machine allows unaligned memory accesses 1880 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1881 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1882 } 1883 1884 #ifndef PRODUCT 1885 if (log_is_enabled(Info, os, cpu)) { 1886 LogStream ls(Log(os, cpu)::info()); 1887 outputStream* log = &ls; 1888 log->print_cr("Logical CPUs per core: %u", 1889 logical_processors_per_package()); 1890 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1891 log->print("UseSSE=%d", UseSSE); 1892 if (UseAVX > 0) { 1893 log->print(" UseAVX=%d", UseAVX); 1894 } 1895 if (UseAES) { 1896 log->print(" UseAES=1"); 1897 } 1898 #ifdef COMPILER2 1899 if (MaxVectorSize > 0) { 1900 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1901 } 1902 #endif 1903 log->cr(); 1904 log->print("Allocation"); 1905 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1906 log->print_cr(": no prefetching"); 1907 } else { 1908 log->print(" prefetching: "); 1909 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1910 log->print("PREFETCHW"); 1911 } else if (UseSSE >= 1) { 1912 if (AllocatePrefetchInstr == 0) { 1913 log->print("PREFETCHNTA"); 1914 } else if (AllocatePrefetchInstr == 1) { 1915 log->print("PREFETCHT0"); 1916 } else if (AllocatePrefetchInstr == 2) { 1917 log->print("PREFETCHT2"); 1918 } else if (AllocatePrefetchInstr == 3) { 1919 log->print("PREFETCHW"); 1920 } 1921 } 1922 if (AllocatePrefetchLines > 1) { 1923 log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize); 1924 } else { 1925 log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize); 1926 } 1927 } 1928 1929 if (PrefetchCopyIntervalInBytes > 0) { 1930 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1931 } 1932 if (PrefetchScanIntervalInBytes > 0) { 1933 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1934 } 1935 if (ContendedPaddingWidth > 0) { 1936 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1937 } 1938 } 1939 #endif // !PRODUCT 1940 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1941 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1942 } 1943 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1944 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1945 } 1946 } 1947 1948 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1949 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1950 if (vrt == XenHVM) { 1951 st->print_cr("Xen hardware-assisted virtualization detected"); 1952 } else if (vrt == KVM) { 1953 st->print_cr("KVM virtualization detected"); 1954 } else if (vrt == VMWare) { 1955 st->print_cr("VMWare virtualization detected"); 1956 VirtualizationSupport::print_virtualization_info(st); 1957 } else if (vrt == HyperV) { 1958 st->print_cr("Hyper-V virtualization detected"); 1959 } else if (vrt == HyperVRole) { 1960 st->print_cr("Hyper-V role detected"); 1961 } 1962 } 1963 1964 bool VM_Version::compute_has_intel_jcc_erratum() { 1965 if (!is_intel_family_core()) { 1966 // Only Intel CPUs are affected. 1967 return false; 1968 } 1969 // The following table of affected CPUs is based on the following document released by Intel: 1970 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1971 switch (_model) { 1972 case 0x8E: 1973 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1974 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1975 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1976 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1977 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1978 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1979 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1980 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1981 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1982 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1983 case 0x4E: 1984 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1985 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1986 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1987 return _stepping == 0x3; 1988 case 0x55: 1989 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1990 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1991 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1992 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1993 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1994 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1995 return _stepping == 0x4 || _stepping == 0x7; 1996 case 0x5E: 1997 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1998 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1999 return _stepping == 0x3; 2000 case 0x9E: 2001 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 2002 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 2003 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 2004 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 2005 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 2006 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 2007 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 2008 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 2009 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 2010 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 2011 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 2012 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 2013 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 2014 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 2015 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 2016 case 0xA5: 2017 // Not in Intel documentation. 2018 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 2019 return true; 2020 case 0xA6: 2021 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 2022 return _stepping == 0x0; 2023 case 0xAE: 2024 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 2025 return _stepping == 0xA; 2026 default: 2027 // If we are running on another intel machine not recognized in the table, we are okay. 2028 return false; 2029 } 2030 } 2031 2032 // On Xen, the cpuid instruction returns 2033 // eax / registers[0]: Version of Xen 2034 // ebx / registers[1]: chars 'XenV' 2035 // ecx / registers[2]: chars 'MMXe' 2036 // edx / registers[3]: chars 'nVMM' 2037 // 2038 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 2039 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 2040 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 2041 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 2042 // 2043 // more information : 2044 // https://kb.vmware.com/s/article/1009458 2045 // 2046 void VM_Version::check_virtualizations() { 2047 uint32_t registers[4] = {0}; 2048 char signature[13] = {0}; 2049 2050 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2051 // from 0x40000000 until 0x40010000. 2052 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2053 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2054 detect_virt_stub(leaf, registers); 2055 memcpy(signature, ®isters[1], 12); 2056 2057 if (strncmp("VMwareVMware", signature, 12) == 0) { 2058 Abstract_VM_Version::_detected_virtualization = VMWare; 2059 // check for extended metrics from guestlib 2060 VirtualizationSupport::initialize(); 2061 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2062 Abstract_VM_Version::_detected_virtualization = HyperV; 2063 #ifdef _WINDOWS 2064 // CPUID leaf 0x40000007 is available to the root partition only. 2065 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2066 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2067 detect_virt_stub(0x40000007, registers); 2068 if ((registers[0] != 0x0) || 2069 (registers[1] != 0x0) || 2070 (registers[2] != 0x0) || 2071 (registers[3] != 0x0)) { 2072 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2073 } 2074 #endif 2075 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2076 Abstract_VM_Version::_detected_virtualization = KVM; 2077 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2078 Abstract_VM_Version::_detected_virtualization = XenHVM; 2079 } 2080 } 2081 } 2082 2083 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2084 // for implementing the array copy and clear operations. 2085 // The Intel platforms that supports the serialize instruction 2086 // has improved implementation of 64-byte load/stores and so the default 2087 // threshold is set to 0 for these platforms. 2088 int VM_Version::avx3_threshold() { 2089 return (is_intel_family_core() && 2090 supports_serialize() && 2091 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2092 } 2093 2094 static bool _vm_version_initialized = false; 2095 2096 void VM_Version::initialize() { 2097 ResourceMark rm; 2098 // Making this stub must be FIRST use of assembler 2099 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2100 if (stub_blob == NULL) { 2101 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2102 } 2103 CodeBuffer c(stub_blob); 2104 VM_Version_StubGenerator g(&c); 2105 2106 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2107 g.generate_get_cpu_info()); 2108 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2109 g.generate_detect_virt()); 2110 2111 get_processor_features(); 2112 2113 LP64_ONLY(Assembler::precompute_instructions();) 2114 2115 if (VM_Version::supports_hv()) { // Supports hypervisor 2116 check_virtualizations(); 2117 } 2118 _vm_version_initialized = true; 2119 } 2120 2121 typedef enum { 2122 CPU_FAMILY_8086_8088 = 0, 2123 CPU_FAMILY_INTEL_286 = 2, 2124 CPU_FAMILY_INTEL_386 = 3, 2125 CPU_FAMILY_INTEL_486 = 4, 2126 CPU_FAMILY_PENTIUM = 5, 2127 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2128 CPU_FAMILY_PENTIUM_4 = 0xF 2129 } FamilyFlag; 2130 2131 typedef enum { 2132 RDTSCP_FLAG = 0x08000000, // bit 27 2133 INTEL64_FLAG = 0x20000000 // bit 29 2134 } _featureExtendedEdxFlag; 2135 2136 typedef enum { 2137 FPU_FLAG = 0x00000001, 2138 VME_FLAG = 0x00000002, 2139 DE_FLAG = 0x00000004, 2140 PSE_FLAG = 0x00000008, 2141 TSC_FLAG = 0x00000010, 2142 MSR_FLAG = 0x00000020, 2143 PAE_FLAG = 0x00000040, 2144 MCE_FLAG = 0x00000080, 2145 CX8_FLAG = 0x00000100, 2146 APIC_FLAG = 0x00000200, 2147 SEP_FLAG = 0x00000800, 2148 MTRR_FLAG = 0x00001000, 2149 PGE_FLAG = 0x00002000, 2150 MCA_FLAG = 0x00004000, 2151 CMOV_FLAG = 0x00008000, 2152 PAT_FLAG = 0x00010000, 2153 PSE36_FLAG = 0x00020000, 2154 PSNUM_FLAG = 0x00040000, 2155 CLFLUSH_FLAG = 0x00080000, 2156 DTS_FLAG = 0x00200000, 2157 ACPI_FLAG = 0x00400000, 2158 MMX_FLAG = 0x00800000, 2159 FXSR_FLAG = 0x01000000, 2160 SSE_FLAG = 0x02000000, 2161 SSE2_FLAG = 0x04000000, 2162 SS_FLAG = 0x08000000, 2163 HTT_FLAG = 0x10000000, 2164 TM_FLAG = 0x20000000 2165 } FeatureEdxFlag; 2166 2167 static BufferBlob* cpuid_brand_string_stub_blob; 2168 static const int cpuid_brand_string_stub_size = 550; 2169 2170 extern "C" { 2171 typedef void (*getCPUIDBrandString_stub_t)(void*); 2172 } 2173 2174 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = NULL; 2175 2176 // VM_Version statics 2177 enum { 2178 ExtendedFamilyIdLength_INTEL = 16, 2179 ExtendedFamilyIdLength_AMD = 24 2180 }; 2181 2182 const size_t VENDOR_LENGTH = 13; 2183 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2184 static char* _cpu_brand_string = NULL; 2185 static int64_t _max_qualified_cpu_frequency = 0; 2186 2187 static int _no_of_threads = 0; 2188 static int _no_of_cores = 0; 2189 2190 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2191 "8086/8088", 2192 "", 2193 "286", 2194 "386", 2195 "486", 2196 "Pentium", 2197 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2198 "", 2199 "", 2200 "", 2201 "", 2202 "", 2203 "", 2204 "", 2205 "", 2206 "Pentium 4" 2207 }; 2208 2209 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2210 "", 2211 "", 2212 "", 2213 "", 2214 "5x86", 2215 "K5/K6", 2216 "Athlon/AthlonXP", 2217 "", 2218 "", 2219 "", 2220 "", 2221 "", 2222 "", 2223 "", 2224 "", 2225 "Opteron/Athlon64", 2226 "Opteron QC/Phenom", // Barcelona et.al. 2227 "", 2228 "", 2229 "", 2230 "", 2231 "", 2232 "", 2233 "Zen" 2234 }; 2235 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2236 // September 2013, Vol 3C Table 35-1 2237 const char* const _model_id_pentium_pro[] = { 2238 "", 2239 "Pentium Pro", 2240 "", 2241 "Pentium II model 3", 2242 "", 2243 "Pentium II model 5/Xeon/Celeron", 2244 "Celeron", 2245 "Pentium III/Pentium III Xeon", 2246 "Pentium III/Pentium III Xeon", 2247 "Pentium M model 9", // Yonah 2248 "Pentium III, model A", 2249 "Pentium III, model B", 2250 "", 2251 "Pentium M model D", // Dothan 2252 "", 2253 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2254 "", 2255 "", 2256 "", 2257 "", 2258 "", 2259 "", 2260 "Celeron", // 0x16 Celeron 65nm 2261 "Core 2", // 0x17 Penryn / Harpertown 2262 "", 2263 "", 2264 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2265 "Atom", // 0x1B Z5xx series Silverthorn 2266 "", 2267 "Core 2", // 0x1D Dunnington (6-core) 2268 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2269 "", 2270 "", 2271 "", 2272 "", 2273 "", 2274 "", 2275 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2276 "", 2277 "", 2278 "", // 0x28 2279 "", 2280 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2281 "", 2282 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2283 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2284 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2285 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2286 "", 2287 "", 2288 "", 2289 "", 2290 "", 2291 "", 2292 "", 2293 "", 2294 "", 2295 "", 2296 "Ivy Bridge", // 0x3a 2297 "", 2298 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2299 "", // 0x3d "Next Generation Intel Core Processor" 2300 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2301 "", // 0x3f "Future Generation Intel Xeon Processor" 2302 "", 2303 "", 2304 "", 2305 "", 2306 "", 2307 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2308 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2309 NULL 2310 }; 2311 2312 /* Brand ID is for back compatibility 2313 * Newer CPUs uses the extended brand string */ 2314 const char* const _brand_id[] = { 2315 "", 2316 "Celeron processor", 2317 "Pentium III processor", 2318 "Intel Pentium III Xeon processor", 2319 "", 2320 "", 2321 "", 2322 "", 2323 "Intel Pentium 4 processor", 2324 NULL 2325 }; 2326 2327 2328 const char* const _feature_edx_id[] = { 2329 "On-Chip FPU", 2330 "Virtual Mode Extensions", 2331 "Debugging Extensions", 2332 "Page Size Extensions", 2333 "Time Stamp Counter", 2334 "Model Specific Registers", 2335 "Physical Address Extension", 2336 "Machine Check Exceptions", 2337 "CMPXCHG8B Instruction", 2338 "On-Chip APIC", 2339 "", 2340 "Fast System Call", 2341 "Memory Type Range Registers", 2342 "Page Global Enable", 2343 "Machine Check Architecture", 2344 "Conditional Mov Instruction", 2345 "Page Attribute Table", 2346 "36-bit Page Size Extension", 2347 "Processor Serial Number", 2348 "CLFLUSH Instruction", 2349 "", 2350 "Debug Trace Store feature", 2351 "ACPI registers in MSR space", 2352 "Intel Architecture MMX Technology", 2353 "Fast Float Point Save and Restore", 2354 "Streaming SIMD extensions", 2355 "Streaming SIMD extensions 2", 2356 "Self-Snoop", 2357 "Hyper Threading", 2358 "Thermal Monitor", 2359 "", 2360 "Pending Break Enable" 2361 }; 2362 2363 const char* const _feature_extended_edx_id[] = { 2364 "", 2365 "", 2366 "", 2367 "", 2368 "", 2369 "", 2370 "", 2371 "", 2372 "", 2373 "", 2374 "", 2375 "SYSCALL/SYSRET", 2376 "", 2377 "", 2378 "", 2379 "", 2380 "", 2381 "", 2382 "", 2383 "", 2384 "Execute Disable Bit", 2385 "", 2386 "", 2387 "", 2388 "", 2389 "", 2390 "", 2391 "RDTSCP", 2392 "", 2393 "Intel 64 Architecture", 2394 "", 2395 "" 2396 }; 2397 2398 const char* const _feature_ecx_id[] = { 2399 "Streaming SIMD Extensions 3", 2400 "PCLMULQDQ", 2401 "64-bit DS Area", 2402 "MONITOR/MWAIT instructions", 2403 "CPL Qualified Debug Store", 2404 "Virtual Machine Extensions", 2405 "Safer Mode Extensions", 2406 "Enhanced Intel SpeedStep technology", 2407 "Thermal Monitor 2", 2408 "Supplemental Streaming SIMD Extensions 3", 2409 "L1 Context ID", 2410 "", 2411 "Fused Multiply-Add", 2412 "CMPXCHG16B", 2413 "xTPR Update Control", 2414 "Perfmon and Debug Capability", 2415 "", 2416 "Process-context identifiers", 2417 "Direct Cache Access", 2418 "Streaming SIMD extensions 4.1", 2419 "Streaming SIMD extensions 4.2", 2420 "x2APIC", 2421 "MOVBE", 2422 "Popcount instruction", 2423 "TSC-Deadline", 2424 "AESNI", 2425 "XSAVE", 2426 "OSXSAVE", 2427 "AVX", 2428 "F16C", 2429 "RDRAND", 2430 "" 2431 }; 2432 2433 const char* const _feature_extended_ecx_id[] = { 2434 "LAHF/SAHF instruction support", 2435 "Core multi-processor legacy mode", 2436 "", 2437 "", 2438 "", 2439 "Advanced Bit Manipulations: LZCNT", 2440 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2441 "Misaligned SSE mode", 2442 "", 2443 "", 2444 "", 2445 "", 2446 "", 2447 "", 2448 "", 2449 "", 2450 "", 2451 "", 2452 "", 2453 "", 2454 "", 2455 "", 2456 "", 2457 "", 2458 "", 2459 "", 2460 "", 2461 "", 2462 "", 2463 "", 2464 "", 2465 "" 2466 }; 2467 2468 void VM_Version::initialize_tsc(void) { 2469 ResourceMark rm; 2470 2471 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2472 if (cpuid_brand_string_stub_blob == NULL) { 2473 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2474 } 2475 CodeBuffer c(cpuid_brand_string_stub_blob); 2476 VM_Version_StubGenerator g(&c); 2477 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2478 g.generate_getCPUIDBrandString()); 2479 } 2480 2481 const char* VM_Version::cpu_model_description(void) { 2482 uint32_t cpu_family = extended_cpu_family(); 2483 uint32_t cpu_model = extended_cpu_model(); 2484 const char* model = NULL; 2485 2486 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2487 for (uint32_t i = 0; i <= cpu_model; i++) { 2488 model = _model_id_pentium_pro[i]; 2489 if (model == NULL) { 2490 break; 2491 } 2492 } 2493 } 2494 return model; 2495 } 2496 2497 const char* VM_Version::cpu_brand_string(void) { 2498 if (_cpu_brand_string == NULL) { 2499 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2500 if (NULL == _cpu_brand_string) { 2501 return NULL; 2502 } 2503 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2504 if (ret_val != OS_OK) { 2505 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2506 _cpu_brand_string = NULL; 2507 } 2508 } 2509 return _cpu_brand_string; 2510 } 2511 2512 const char* VM_Version::cpu_brand(void) { 2513 const char* brand = NULL; 2514 2515 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2516 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2517 brand = _brand_id[0]; 2518 for (int i = 0; brand != NULL && i <= brand_num; i += 1) { 2519 brand = _brand_id[i]; 2520 } 2521 } 2522 return brand; 2523 } 2524 2525 bool VM_Version::cpu_is_em64t(void) { 2526 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2527 } 2528 2529 bool VM_Version::is_netburst(void) { 2530 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2531 } 2532 2533 bool VM_Version::supports_tscinv_ext(void) { 2534 if (!supports_tscinv_bit()) { 2535 return false; 2536 } 2537 2538 if (is_intel()) { 2539 return true; 2540 } 2541 2542 if (is_amd()) { 2543 return !is_amd_Barcelona(); 2544 } 2545 2546 if (is_hygon()) { 2547 return true; 2548 } 2549 2550 return false; 2551 } 2552 2553 void VM_Version::resolve_cpu_information_details(void) { 2554 2555 // in future we want to base this information on proper cpu 2556 // and cache topology enumeration such as: 2557 // Intel 64 Architecture Processor Topology Enumeration 2558 // which supports system cpu and cache topology enumeration 2559 // either using 2xAPICIDs or initial APICIDs 2560 2561 // currently only rough cpu information estimates 2562 // which will not necessarily reflect the exact configuration of the system 2563 2564 // this is the number of logical hardware threads 2565 // visible to the operating system 2566 _no_of_threads = os::processor_count(); 2567 2568 // find out number of threads per cpu package 2569 int threads_per_package = threads_per_core() * cores_per_cpu(); 2570 2571 // use amount of threads visible to the process in order to guess number of sockets 2572 _no_of_sockets = _no_of_threads / threads_per_package; 2573 2574 // process might only see a subset of the total number of threads 2575 // from a single processor package. Virtualization/resource management for example. 2576 // If so then just write a hard 1 as num of pkgs. 2577 if (0 == _no_of_sockets) { 2578 _no_of_sockets = 1; 2579 } 2580 2581 // estimate the number of cores 2582 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2583 } 2584 2585 2586 const char* VM_Version::cpu_family_description(void) { 2587 int cpu_family_id = extended_cpu_family(); 2588 if (is_amd()) { 2589 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2590 return _family_id_amd[cpu_family_id]; 2591 } 2592 } 2593 if (is_intel()) { 2594 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2595 return cpu_model_description(); 2596 } 2597 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2598 return _family_id_intel[cpu_family_id]; 2599 } 2600 } 2601 if (is_hygon()) { 2602 return "Dhyana"; 2603 } 2604 return "Unknown x86"; 2605 } 2606 2607 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2608 assert(buf != NULL, "buffer is NULL!"); 2609 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2610 2611 const char* cpu_type = NULL; 2612 const char* x64 = NULL; 2613 2614 if (is_intel()) { 2615 cpu_type = "Intel"; 2616 x64 = cpu_is_em64t() ? " Intel64" : ""; 2617 } else if (is_amd()) { 2618 cpu_type = "AMD"; 2619 x64 = cpu_is_em64t() ? " AMD64" : ""; 2620 } else if (is_hygon()) { 2621 cpu_type = "Hygon"; 2622 x64 = cpu_is_em64t() ? " AMD64" : ""; 2623 } else { 2624 cpu_type = "Unknown x86"; 2625 x64 = cpu_is_em64t() ? " x86_64" : ""; 2626 } 2627 2628 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2629 cpu_type, 2630 cpu_family_description(), 2631 supports_ht() ? " (HT)" : "", 2632 supports_sse3() ? " SSE3" : "", 2633 supports_ssse3() ? " SSSE3" : "", 2634 supports_sse4_1() ? " SSE4.1" : "", 2635 supports_sse4_2() ? " SSE4.2" : "", 2636 supports_sse4a() ? " SSE4A" : "", 2637 is_netburst() ? " Netburst" : "", 2638 is_intel_family_core() ? " Core" : "", 2639 x64); 2640 2641 return OS_OK; 2642 } 2643 2644 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2645 assert(buf != NULL, "buffer is NULL!"); 2646 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2647 assert(getCPUIDBrandString_stub != NULL, "not initialized"); 2648 2649 // invoke newly generated asm code to fetch CPU Brand String 2650 getCPUIDBrandString_stub(&_cpuid_info); 2651 2652 // fetch results into buffer 2653 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2654 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2655 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2656 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2657 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2658 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2659 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2660 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2661 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2662 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2663 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2664 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2665 2666 return OS_OK; 2667 } 2668 2669 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2670 guarantee(buf != NULL, "buffer is NULL!"); 2671 guarantee(buf_len > 0, "buffer len not enough!"); 2672 2673 unsigned int flag = 0; 2674 unsigned int fi = 0; 2675 size_t written = 0; 2676 const char* prefix = ""; 2677 2678 #define WRITE_TO_BUF(string) \ 2679 { \ 2680 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2681 if (res < 0) { \ 2682 return buf_len - 1; \ 2683 } \ 2684 written += res; \ 2685 if (prefix[0] == '\0') { \ 2686 prefix = ", "; \ 2687 } \ 2688 } 2689 2690 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2691 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2692 continue; /* no hyperthreading */ 2693 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2694 continue; /* no fast system call */ 2695 } 2696 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2697 WRITE_TO_BUF(_feature_edx_id[fi]); 2698 } 2699 } 2700 2701 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2702 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2703 WRITE_TO_BUF(_feature_ecx_id[fi]); 2704 } 2705 } 2706 2707 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2708 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2709 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2710 } 2711 } 2712 2713 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2714 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2715 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2716 } 2717 } 2718 2719 if (supports_tscinv_bit()) { 2720 WRITE_TO_BUF("Invariant TSC"); 2721 } 2722 2723 return written; 2724 } 2725 2726 /** 2727 * Write a detailed description of the cpu to a given buffer, including 2728 * feature set. 2729 */ 2730 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2731 assert(buf != NULL, "buffer is NULL!"); 2732 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2733 2734 static const char* unknown = "<unknown>"; 2735 char vendor_id[VENDOR_LENGTH]; 2736 const char* family = NULL; 2737 const char* model = NULL; 2738 const char* brand = NULL; 2739 int outputLen = 0; 2740 2741 family = cpu_family_description(); 2742 if (family == NULL) { 2743 family = unknown; 2744 } 2745 2746 model = cpu_model_description(); 2747 if (model == NULL) { 2748 model = unknown; 2749 } 2750 2751 brand = cpu_brand_string(); 2752 2753 if (brand == NULL) { 2754 brand = cpu_brand(); 2755 if (brand == NULL) { 2756 brand = unknown; 2757 } 2758 } 2759 2760 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2761 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2762 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2763 vendor_id[VENDOR_LENGTH-1] = '\0'; 2764 2765 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2766 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2767 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2768 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2769 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2770 "Supports: ", 2771 brand, 2772 vendor_id, 2773 family, 2774 extended_cpu_family(), 2775 model, 2776 extended_cpu_model(), 2777 cpu_stepping(), 2778 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2779 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2780 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2781 _cpuid_info.std_cpuid1_eax.value, 2782 _cpuid_info.std_cpuid1_ebx.value, 2783 _cpuid_info.std_cpuid1_ecx.value, 2784 _cpuid_info.std_cpuid1_edx.value, 2785 _cpuid_info.ext_cpuid1_eax, 2786 _cpuid_info.ext_cpuid1_ebx, 2787 _cpuid_info.ext_cpuid1_ecx, 2788 _cpuid_info.ext_cpuid1_edx); 2789 2790 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2791 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2792 return OS_ERR; 2793 } 2794 2795 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2796 2797 return OS_OK; 2798 } 2799 2800 2801 // Fill in Abstract_VM_Version statics 2802 void VM_Version::initialize_cpu_information() { 2803 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2804 assert(!_initialized, "shouldn't be initialized yet"); 2805 resolve_cpu_information_details(); 2806 2807 // initialize cpu_name and cpu_desc 2808 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2809 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2810 _initialized = true; 2811 } 2812 2813 /** 2814 * For information about extracting the frequency from the cpu brand string, please see: 2815 * 2816 * Intel Processor Identification and the CPUID Instruction 2817 * Application Note 485 2818 * May 2012 2819 * 2820 * The return value is the frequency in Hz. 2821 */ 2822 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2823 const char* const brand_string = cpu_brand_string(); 2824 if (brand_string == NULL) { 2825 return 0; 2826 } 2827 const int64_t MEGA = 1000000; 2828 int64_t multiplier = 0; 2829 int64_t frequency = 0; 2830 uint8_t idx = 0; 2831 // The brand string buffer is at most 48 bytes. 2832 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2833 for (; idx < 48-2; ++idx) { 2834 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2835 // Search brand string for "yHz" where y is M, G, or T. 2836 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2837 if (brand_string[idx] == 'M') { 2838 multiplier = MEGA; 2839 } else if (brand_string[idx] == 'G') { 2840 multiplier = MEGA * 1000; 2841 } else if (brand_string[idx] == 'T') { 2842 multiplier = MEGA * MEGA; 2843 } 2844 break; 2845 } 2846 } 2847 if (multiplier > 0) { 2848 // Compute frequency (in Hz) from brand string. 2849 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2850 frequency = (brand_string[idx-4] - '0') * multiplier; 2851 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2852 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2853 } else { // format is "xxxx" 2854 frequency = (brand_string[idx-4] - '0') * 1000; 2855 frequency += (brand_string[idx-3] - '0') * 100; 2856 frequency += (brand_string[idx-2] - '0') * 10; 2857 frequency += (brand_string[idx-1] - '0'); 2858 frequency *= multiplier; 2859 } 2860 } 2861 return frequency; 2862 } 2863 2864 2865 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2866 if (_max_qualified_cpu_frequency == 0) { 2867 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2868 } 2869 return _max_qualified_cpu_frequency; 2870 } 2871 2872 uint64_t VM_Version::feature_flags() { 2873 uint64_t result = 0; 2874 if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0) 2875 result |= CPU_CX8; 2876 if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0) 2877 result |= CPU_CMOV; 2878 if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0) 2879 result |= CPU_FLUSH; 2880 #ifdef _LP64 2881 // clflush should always be available on x86_64 2882 // if not we are in real trouble because we rely on it 2883 // to flush the code cache. 2884 assert ((result & CPU_FLUSH) != 0, "clflush should be available"); 2885 #endif 2886 if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() && 2887 _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0)) 2888 result |= CPU_FXSR; 2889 // HT flag is set for multi-core processors also. 2890 if (threads_per_core() > 1) 2891 result |= CPU_HT; 2892 if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() && 2893 _cpuid_info.ext_cpuid1_edx.bits.mmx != 0)) 2894 result |= CPU_MMX; 2895 if (_cpuid_info.std_cpuid1_edx.bits.sse != 0) 2896 result |= CPU_SSE; 2897 if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0) 2898 result |= CPU_SSE2; 2899 if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0) 2900 result |= CPU_SSE3; 2901 if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0) 2902 result |= CPU_SSSE3; 2903 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0) 2904 result |= CPU_SSE4_1; 2905 if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0) 2906 result |= CPU_SSE4_2; 2907 if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0) 2908 result |= CPU_POPCNT; 2909 if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 && 2910 _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 && 2911 _cpuid_info.xem_xcr0_eax.bits.sse != 0 && 2912 _cpuid_info.xem_xcr0_eax.bits.ymm != 0) { 2913 result |= CPU_AVX; 2914 result |= CPU_VZEROUPPER; 2915 if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0) 2916 result |= CPU_F16C; 2917 if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0) 2918 result |= CPU_AVX2; 2919 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 && 2920 _cpuid_info.xem_xcr0_eax.bits.opmask != 0 && 2921 _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 && 2922 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) { 2923 result |= CPU_AVX512F; 2924 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0) 2925 result |= CPU_AVX512CD; 2926 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0) 2927 result |= CPU_AVX512DQ; 2928 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0) 2929 result |= CPU_AVX512_IFMA; 2930 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0) 2931 result |= CPU_AVX512PF; 2932 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0) 2933 result |= CPU_AVX512ER; 2934 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0) 2935 result |= CPU_AVX512BW; 2936 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0) 2937 result |= CPU_AVX512VL; 2938 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0) 2939 result |= CPU_AVX512_VPOPCNTDQ; 2940 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0) 2941 result |= CPU_AVX512_VPCLMULQDQ; 2942 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0) 2943 result |= CPU_AVX512_VAES; 2944 if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0) 2945 result |= CPU_GFNI; 2946 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0) 2947 result |= CPU_AVX512_VNNI; 2948 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0) 2949 result |= CPU_AVX512_BITALG; 2950 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0) 2951 result |= CPU_AVX512_VBMI; 2952 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0) 2953 result |= CPU_AVX512_VBMI2; 2954 } 2955 } 2956 if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0) 2957 result |= CPU_HV; 2958 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0) 2959 result |= CPU_BMI1; 2960 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0) 2961 result |= CPU_TSC; 2962 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0) 2963 result |= CPU_TSCINV_BIT; 2964 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0) 2965 result |= CPU_AES; 2966 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0) 2967 result |= CPU_ERMS; 2968 if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0) 2969 result |= CPU_FSRM; 2970 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0) 2971 result |= CPU_CLMUL; 2972 if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0) 2973 result |= CPU_RTM; 2974 if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0) 2975 result |= CPU_ADX; 2976 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0) 2977 result |= CPU_BMI2; 2978 if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0) 2979 result |= CPU_SHA; 2980 if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0) 2981 result |= CPU_FMA; 2982 if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0) 2983 result |= CPU_FLUSHOPT; 2984 if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0) 2985 result |= CPU_RDTSCP; 2986 if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0) 2987 result |= CPU_RDPID; 2988 2989 // AMD|Hygon features. 2990 if (is_amd_family()) { 2991 if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) || 2992 (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0)) 2993 result |= CPU_3DNOW_PREFETCH; 2994 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) 2995 result |= CPU_LZCNT; 2996 if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0) 2997 result |= CPU_SSE4A; 2998 } 2999 3000 // Intel features. 3001 if (is_intel()) { 3002 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3003 result |= CPU_LZCNT; 3004 } 3005 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3006 result |= CPU_3DNOW_PREFETCH; 3007 } 3008 if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) { 3009 result |= CPU_CLWB; 3010 } 3011 if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0) 3012 result |= CPU_SERIALIZE; 3013 } 3014 3015 // ZX features. 3016 if (is_zx()) { 3017 if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) { 3018 result |= CPU_LZCNT; 3019 } 3020 if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) { 3021 result |= CPU_3DNOW_PREFETCH; 3022 } 3023 } 3024 3025 // Protection key features. 3026 if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) { 3027 result |= CPU_PKU; 3028 } 3029 if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) { 3030 result |= CPU_OSPKE; 3031 } 3032 3033 // Control flow enforcement (CET) features. 3034 if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) { 3035 result |= CPU_CET_SS; 3036 } 3037 if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) { 3038 result |= CPU_CET_IBT; 3039 } 3040 3041 // Composite features. 3042 if (supports_tscinv_bit() && 3043 ((is_amd_family() && !is_amd_Barcelona()) || 3044 is_intel_tsc_synched_at_init())) { 3045 result |= CPU_TSCINV; 3046 } 3047 3048 return result; 3049 } 3050 3051 bool VM_Version::os_supports_avx_vectors() { 3052 bool retVal = false; 3053 int nreg = 2 LP64_ONLY(+2); 3054 if (supports_evex()) { 3055 // Verify that OS save/restore all bits of EVEX registers 3056 // during signal processing. 3057 retVal = true; 3058 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3059 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3060 retVal = false; 3061 break; 3062 } 3063 } 3064 } else if (supports_avx()) { 3065 // Verify that OS save/restore all bits of AVX registers 3066 // during signal processing. 3067 retVal = true; 3068 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register 3069 if (_cpuid_info.ymm_save[i] != ymm_test_value()) { 3070 retVal = false; 3071 break; 3072 } 3073 } 3074 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen 3075 if (retVal == false) { 3076 // Verify that OS save/restore all bits of EVEX registers 3077 // during signal processing. 3078 retVal = true; 3079 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register 3080 if (_cpuid_info.zmm_save[i] != ymm_test_value()) { 3081 retVal = false; 3082 break; 3083 } 3084 } 3085 } 3086 } 3087 return retVal; 3088 } 3089 3090 uint VM_Version::cores_per_cpu() { 3091 uint result = 1; 3092 if (is_intel()) { 3093 bool supports_topology = supports_processor_topology(); 3094 if (supports_topology) { 3095 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3096 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3097 } 3098 if (!supports_topology || result == 0) { 3099 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3100 } 3101 } else if (is_amd_family()) { 3102 result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1); 3103 } else if (is_zx()) { 3104 bool supports_topology = supports_processor_topology(); 3105 if (supports_topology) { 3106 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus / 3107 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3108 } 3109 if (!supports_topology || result == 0) { 3110 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1); 3111 } 3112 } 3113 return result; 3114 } 3115 3116 uint VM_Version::threads_per_core() { 3117 uint result = 1; 3118 if (is_intel() && supports_processor_topology()) { 3119 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3120 } else if (is_zx() && supports_processor_topology()) { 3121 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus; 3122 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) { 3123 if (cpu_family() >= 0x17) { 3124 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1; 3125 } else { 3126 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu / 3127 cores_per_cpu(); 3128 } 3129 } 3130 return (result == 0 ? 1 : result); 3131 } 3132 3133 intx VM_Version::L1_line_size() { 3134 intx result = 0; 3135 if (is_intel()) { 3136 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3137 } else if (is_amd_family()) { 3138 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size; 3139 } else if (is_zx()) { 3140 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1); 3141 } 3142 if (result < 32) // not defined ? 3143 result = 32; // 32 bytes by default on x86 and other x64 3144 return result; 3145 } 3146 3147 bool VM_Version::is_intel_tsc_synched_at_init() { 3148 if (is_intel_family_core()) { 3149 uint32_t ext_model = extended_cpu_model(); 3150 if (ext_model == CPU_MODEL_NEHALEM_EP || 3151 ext_model == CPU_MODEL_WESTMERE_EP || 3152 ext_model == CPU_MODEL_SANDYBRIDGE_EP || 3153 ext_model == CPU_MODEL_IVYBRIDGE_EP) { 3154 // <= 2-socket invariant tsc support. EX versions are usually used 3155 // in > 2-socket systems and likely don't synchronize tscs at 3156 // initialization. 3157 // Code that uses tsc values must be prepared for them to arbitrarily 3158 // jump forward or backward. 3159 return true; 3160 } 3161 } 3162 return false; 3163 } 3164 3165 intx VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) { 3166 // Hardware prefetching (distance/size in bytes): 3167 // Pentium 3 - 64 / 32 3168 // Pentium 4 - 256 / 128 3169 // Athlon - 64 / 32 ???? 3170 // Opteron - 128 / 64 only when 2 sequential cache lines accessed 3171 // Core - 128 / 64 3172 // 3173 // Software prefetching (distance in bytes / instruction with best score): 3174 // Pentium 3 - 128 / prefetchnta 3175 // Pentium 4 - 512 / prefetchnta 3176 // Athlon - 128 / prefetchnta 3177 // Opteron - 256 / prefetchnta 3178 // Core - 256 / prefetchnta 3179 // It will be used only when AllocatePrefetchStyle > 0 3180 3181 if (is_amd_family()) { // AMD | Hygon 3182 if (supports_sse2()) { 3183 return 256; // Opteron 3184 } else { 3185 return 128; // Athlon 3186 } 3187 } else { // Intel 3188 if (supports_sse3() && cpu_family() == 6) { 3189 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus 3190 return 192; 3191 } else if (use_watermark_prefetch) { // watermark prefetching on Core 3192 #ifdef _LP64 3193 return 384; 3194 #else 3195 return 320; 3196 #endif 3197 } 3198 } 3199 if (supports_sse2()) { 3200 if (cpu_family() == 6) { 3201 return 256; // Pentium M, Core, Core2 3202 } else { 3203 return 512; // Pentium 4 3204 } 3205 } else { 3206 return 128; // Pentium 3 (and all other old CPUs) 3207 } 3208 } 3209 }