1 /* 2 * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. 8 * 9 * This code is distributed in the hope that it will be useful, but WITHOUT 10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 12 * version 2 for more details (a copy is included in the LICENSE file that 13 * accompanied this code). 14 * 15 * You should have received a copy of the GNU General Public License version 16 * 2 along with this work; if not, write to the Free Software Foundation, 17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 18 * 19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 20 * or visit www.oracle.com if you need additional information or have any 21 * questions. 22 * 23 */ 24 25 #include "precompiled.hpp" 26 #include "jvm.h" 27 #include "asm/macroAssembler.hpp" 28 #include "asm/macroAssembler.inline.hpp" 29 #include "code/codeBlob.hpp" 30 #include "logging/log.hpp" 31 #include "logging/logStream.hpp" 32 #include "memory/resourceArea.hpp" 33 #include "memory/universe.hpp" 34 #include "runtime/globals_extension.hpp" 35 #include "runtime/java.hpp" 36 #include "runtime/os.hpp" 37 #include "runtime/stubCodeGenerator.hpp" 38 #include "runtime/vm_version.hpp" 39 #include "utilities/powerOfTwo.hpp" 40 #include "utilities/virtualizationSupport.hpp" 41 42 #include OS_HEADER_INLINE(os) 43 44 int VM_Version::_cpu; 45 int VM_Version::_model; 46 int VM_Version::_stepping; 47 bool VM_Version::_has_intel_jcc_erratum; 48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; 49 50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name, 51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)}; 52 #undef DECLARE_CPU_FEATURE_FLAG 53 54 // Address of instruction which causes SEGV 55 address VM_Version::_cpuinfo_segv_addr = 0; 56 // Address of instruction after the one which causes SEGV 57 address VM_Version::_cpuinfo_cont_addr = 0; 58 59 static BufferBlob* stub_blob; 60 static const int stub_size = 2000; 61 62 extern "C" { 63 typedef void (*get_cpu_info_stub_t)(void*); 64 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*); 65 } 66 static get_cpu_info_stub_t get_cpu_info_stub = NULL; 67 static detect_virt_stub_t detect_virt_stub = NULL; 68 69 #ifdef _LP64 70 71 bool VM_Version::supports_clflush() { 72 // clflush should always be available on x86_64 73 // if not we are in real trouble because we rely on it 74 // to flush the code cache. 75 // Unfortunately, Assembler::clflush is currently called as part 76 // of generation of the code cache flush routine. This happens 77 // under Universe::init before the processor features are set 78 // up. Assembler::flush calls this routine to check that clflush 79 // is allowed. So, we give the caller a free pass if Universe init 80 // is still in progress. 81 assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available"); 82 return true; 83 } 84 #endif 85 86 #define CPUID_STANDARD_FN 0x0 87 #define CPUID_STANDARD_FN_1 0x1 88 #define CPUID_STANDARD_FN_4 0x4 89 #define CPUID_STANDARD_FN_B 0xb 90 91 #define CPUID_EXTENDED_FN 0x80000000 92 #define CPUID_EXTENDED_FN_1 0x80000001 93 #define CPUID_EXTENDED_FN_2 0x80000002 94 #define CPUID_EXTENDED_FN_3 0x80000003 95 #define CPUID_EXTENDED_FN_4 0x80000004 96 #define CPUID_EXTENDED_FN_7 0x80000007 97 #define CPUID_EXTENDED_FN_8 0x80000008 98 99 class VM_Version_StubGenerator: public StubCodeGenerator { 100 public: 101 102 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} 103 104 address generate_get_cpu_info() { 105 // Flags to test CPU type. 106 const uint32_t HS_EFL_AC = 0x40000; 107 const uint32_t HS_EFL_ID = 0x200000; 108 // Values for when we don't have a CPUID instruction. 109 const int CPU_FAMILY_SHIFT = 8; 110 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 111 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 112 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2); 113 114 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4; 115 Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup; 116 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check; 117 118 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); 119 # define __ _masm-> 120 121 address start = __ pc(); 122 123 // 124 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info); 125 // 126 // LP64: rcx and rdx are first and second argument registers on windows 127 128 __ push(rbp); 129 #ifdef _LP64 130 __ mov(rbp, c_rarg0); // cpuid_info address 131 #else 132 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 133 #endif 134 __ push(rbx); 135 __ push(rsi); 136 __ pushf(); // preserve rbx, and flags 137 __ pop(rax); 138 __ push(rax); 139 __ mov(rcx, rax); 140 // 141 // if we are unable to change the AC flag, we have a 386 142 // 143 __ xorl(rax, HS_EFL_AC); 144 __ push(rax); 145 __ popf(); 146 __ pushf(); 147 __ pop(rax); 148 __ cmpptr(rax, rcx); 149 __ jccb(Assembler::notEqual, detect_486); 150 151 __ movl(rax, CPU_FAMILY_386); 152 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 153 __ jmp(done); 154 155 // 156 // If we are unable to change the ID flag, we have a 486 which does 157 // not support the "cpuid" instruction. 158 // 159 __ bind(detect_486); 160 __ mov(rax, rcx); 161 __ xorl(rax, HS_EFL_ID); 162 __ push(rax); 163 __ popf(); 164 __ pushf(); 165 __ pop(rax); 166 __ cmpptr(rcx, rax); 167 __ jccb(Assembler::notEqual, detect_586); 168 169 __ bind(cpu486); 170 __ movl(rax, CPU_FAMILY_486); 171 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax); 172 __ jmp(done); 173 174 // 175 // At this point, we have a chip which supports the "cpuid" instruction 176 // 177 __ bind(detect_586); 178 __ xorl(rax, rax); 179 __ cpuid(); 180 __ orl(rax, rax); 181 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 182 // value of at least 1, we give up and 183 // assume a 486 184 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 185 __ movl(Address(rsi, 0), rax); 186 __ movl(Address(rsi, 4), rbx); 187 __ movl(Address(rsi, 8), rcx); 188 __ movl(Address(rsi,12), rdx); 189 190 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported? 191 __ jccb(Assembler::belowEqual, std_cpuid4); 192 193 // 194 // cpuid(0xB) Processor Topology 195 // 196 __ movl(rax, 0xb); 197 __ xorl(rcx, rcx); // Threads level 198 __ cpuid(); 199 200 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset()))); 201 __ movl(Address(rsi, 0), rax); 202 __ movl(Address(rsi, 4), rbx); 203 __ movl(Address(rsi, 8), rcx); 204 __ movl(Address(rsi,12), rdx); 205 206 __ movl(rax, 0xb); 207 __ movl(rcx, 1); // Cores level 208 __ cpuid(); 209 __ push(rax); 210 __ andl(rax, 0x1f); // Determine if valid topology level 211 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 212 __ andl(rax, 0xffff); 213 __ pop(rax); 214 __ jccb(Assembler::equal, std_cpuid4); 215 216 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset()))); 217 __ movl(Address(rsi, 0), rax); 218 __ movl(Address(rsi, 4), rbx); 219 __ movl(Address(rsi, 8), rcx); 220 __ movl(Address(rsi,12), rdx); 221 222 __ movl(rax, 0xb); 223 __ movl(rcx, 2); // Packages level 224 __ cpuid(); 225 __ push(rax); 226 __ andl(rax, 0x1f); // Determine if valid topology level 227 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level 228 __ andl(rax, 0xffff); 229 __ pop(rax); 230 __ jccb(Assembler::equal, std_cpuid4); 231 232 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset()))); 233 __ movl(Address(rsi, 0), rax); 234 __ movl(Address(rsi, 4), rbx); 235 __ movl(Address(rsi, 8), rcx); 236 __ movl(Address(rsi,12), rdx); 237 238 // 239 // cpuid(0x4) Deterministic cache params 240 // 241 __ bind(std_cpuid4); 242 __ movl(rax, 4); 243 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported? 244 __ jccb(Assembler::greater, std_cpuid1); 245 246 __ xorl(rcx, rcx); // L1 cache 247 __ cpuid(); 248 __ push(rax); 249 __ andl(rax, 0x1f); // Determine if valid cache parameters used 250 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache 251 __ pop(rax); 252 __ jccb(Assembler::equal, std_cpuid1); 253 254 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset()))); 255 __ movl(Address(rsi, 0), rax); 256 __ movl(Address(rsi, 4), rbx); 257 __ movl(Address(rsi, 8), rcx); 258 __ movl(Address(rsi,12), rdx); 259 260 // 261 // Standard cpuid(0x1) 262 // 263 __ bind(std_cpuid1); 264 __ movl(rax, 1); 265 __ cpuid(); 266 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 267 __ movl(Address(rsi, 0), rax); 268 __ movl(Address(rsi, 4), rbx); 269 __ movl(Address(rsi, 8), rcx); 270 __ movl(Address(rsi,12), rdx); 271 272 // 273 // Check if OS has enabled XGETBV instruction to access XCR0 274 // (OSXSAVE feature flag) and CPU supports AVX 275 // 276 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 277 __ cmpl(rcx, 0x18000000); 278 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported 279 280 // 281 // XCR0, XFEATURE_ENABLED_MASK register 282 // 283 __ xorl(rcx, rcx); // zero for XCR0 register 284 __ xgetbv(); 285 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); 286 __ movl(Address(rsi, 0), rax); 287 __ movl(Address(rsi, 4), rdx); 288 289 // 290 // cpuid(0x7) Structured Extended Features 291 // 292 __ bind(sef_cpuid); 293 __ movl(rax, 7); 294 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported? 295 __ jccb(Assembler::greater, ext_cpuid); 296 297 __ xorl(rcx, rcx); 298 __ cpuid(); 299 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 300 __ movl(Address(rsi, 0), rax); 301 __ movl(Address(rsi, 4), rbx); 302 __ movl(Address(rsi, 8), rcx); 303 __ movl(Address(rsi, 12), rdx); 304 305 // 306 // Extended cpuid(0x80000000) 307 // 308 __ bind(ext_cpuid); 309 __ movl(rax, 0x80000000); 310 __ cpuid(); 311 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported? 312 __ jcc(Assembler::belowEqual, done); 313 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported? 314 __ jcc(Assembler::belowEqual, ext_cpuid1); 315 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported? 316 __ jccb(Assembler::belowEqual, ext_cpuid5); 317 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported? 318 __ jccb(Assembler::belowEqual, ext_cpuid7); 319 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported? 320 __ jccb(Assembler::belowEqual, ext_cpuid8); 321 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported? 322 __ jccb(Assembler::below, ext_cpuid8); 323 // 324 // Extended cpuid(0x8000001E) 325 // 326 __ movl(rax, 0x8000001E); 327 __ cpuid(); 328 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset()))); 329 __ movl(Address(rsi, 0), rax); 330 __ movl(Address(rsi, 4), rbx); 331 __ movl(Address(rsi, 8), rcx); 332 __ movl(Address(rsi,12), rdx); 333 334 // 335 // Extended cpuid(0x80000008) 336 // 337 __ bind(ext_cpuid8); 338 __ movl(rax, 0x80000008); 339 __ cpuid(); 340 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset()))); 341 __ movl(Address(rsi, 0), rax); 342 __ movl(Address(rsi, 4), rbx); 343 __ movl(Address(rsi, 8), rcx); 344 __ movl(Address(rsi,12), rdx); 345 346 // 347 // Extended cpuid(0x80000007) 348 // 349 __ bind(ext_cpuid7); 350 __ movl(rax, 0x80000007); 351 __ cpuid(); 352 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset()))); 353 __ movl(Address(rsi, 0), rax); 354 __ movl(Address(rsi, 4), rbx); 355 __ movl(Address(rsi, 8), rcx); 356 __ movl(Address(rsi,12), rdx); 357 358 // 359 // Extended cpuid(0x80000005) 360 // 361 __ bind(ext_cpuid5); 362 __ movl(rax, 0x80000005); 363 __ cpuid(); 364 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset()))); 365 __ movl(Address(rsi, 0), rax); 366 __ movl(Address(rsi, 4), rbx); 367 __ movl(Address(rsi, 8), rcx); 368 __ movl(Address(rsi,12), rdx); 369 370 // 371 // Extended cpuid(0x80000001) 372 // 373 __ bind(ext_cpuid1); 374 __ movl(rax, 0x80000001); 375 __ cpuid(); 376 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset()))); 377 __ movl(Address(rsi, 0), rax); 378 __ movl(Address(rsi, 4), rbx); 379 __ movl(Address(rsi, 8), rcx); 380 __ movl(Address(rsi,12), rdx); 381 382 // 383 // Check if OS has enabled XGETBV instruction to access XCR0 384 // (OSXSAVE feature flag) and CPU supports AVX 385 // 386 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 387 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx 388 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx 389 __ cmpl(rcx, 0x18000000); 390 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported 391 392 __ movl(rax, 0x6); 393 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 394 __ cmpl(rax, 0x6); 395 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported 396 397 // we need to bridge farther than imm8, so we use this island as a thunk 398 __ bind(done); 399 __ jmp(wrapup); 400 401 __ bind(start_simd_check); 402 // 403 // Some OSs have a bug when upper 128/256bits of YMM/ZMM 404 // registers are not restored after a signal processing. 405 // Generate SEGV here (reference through NULL) 406 // and check upper YMM/ZMM bits after it. 407 // 408 intx saved_useavx = UseAVX; 409 intx saved_usesse = UseSSE; 410 411 // If UseAVX is uninitialized or is set by the user to include EVEX 412 if (use_evex) { 413 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 414 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 415 __ movl(rax, 0x10000); 416 __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm 417 __ cmpl(rax, 0x10000); 418 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 419 // check _cpuid_info.xem_xcr0_eax.bits.opmask 420 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 421 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 422 __ movl(rax, 0xE0); 423 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 424 __ cmpl(rax, 0xE0); 425 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported 426 427 if (FLAG_IS_DEFAULT(UseAVX)) { 428 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 429 __ movl(rax, Address(rsi, 0)); 430 __ cmpl(rax, 0x50654); // If it is Skylake 431 __ jcc(Assembler::equal, legacy_setup); 432 } 433 // EVEX setup: run in lowest evex mode 434 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 435 UseAVX = 3; 436 UseSSE = 2; 437 #ifdef _WINDOWS 438 // xmm5-xmm15 are not preserved by caller on windows 439 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx 440 __ subptr(rsp, 64); 441 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit); 442 #ifdef _LP64 443 __ subptr(rsp, 64); 444 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit); 445 __ subptr(rsp, 64); 446 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit); 447 #endif // _LP64 448 #endif // _WINDOWS 449 450 // load value into all 64 bytes of zmm7 register 451 __ movl(rcx, VM_Version::ymm_test_value()); 452 __ movdl(xmm0, rcx); 453 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit); 454 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit); 455 #ifdef _LP64 456 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit); 457 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit); 458 #endif 459 VM_Version::clean_cpuFeatures(); 460 __ jmp(save_restore_except); 461 } 462 463 __ bind(legacy_setup); 464 // AVX setup 465 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 466 UseAVX = 1; 467 UseSSE = 2; 468 #ifdef _WINDOWS 469 __ subptr(rsp, 32); 470 __ vmovdqu(Address(rsp, 0), xmm7); 471 #ifdef _LP64 472 __ subptr(rsp, 32); 473 __ vmovdqu(Address(rsp, 0), xmm8); 474 __ subptr(rsp, 32); 475 __ vmovdqu(Address(rsp, 0), xmm15); 476 #endif // _LP64 477 #endif // _WINDOWS 478 479 // load value into all 32 bytes of ymm7 register 480 __ movl(rcx, VM_Version::ymm_test_value()); 481 482 __ movdl(xmm0, rcx); 483 __ pshufd(xmm0, xmm0, 0x00); 484 __ vinsertf128_high(xmm0, xmm0); 485 __ vmovdqu(xmm7, xmm0); 486 #ifdef _LP64 487 __ vmovdqu(xmm8, xmm0); 488 __ vmovdqu(xmm15, xmm0); 489 #endif 490 VM_Version::clean_cpuFeatures(); 491 492 __ bind(save_restore_except); 493 __ xorl(rsi, rsi); 494 VM_Version::set_cpuinfo_segv_addr(__ pc()); 495 // Generate SEGV 496 __ movl(rax, Address(rsi, 0)); 497 498 VM_Version::set_cpuinfo_cont_addr(__ pc()); 499 // Returns here after signal. Save xmm0 to check it later. 500 501 // If UseAVX is uninitialized or is set by the user to include EVEX 502 if (use_evex) { 503 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f 504 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset()))); 505 __ movl(rax, 0x10000); 506 __ andl(rax, Address(rsi, 4)); 507 __ cmpl(rax, 0x10000); 508 __ jcc(Assembler::notEqual, legacy_save_restore); 509 // check _cpuid_info.xem_xcr0_eax.bits.opmask 510 // check _cpuid_info.xem_xcr0_eax.bits.zmm512 511 // check _cpuid_info.xem_xcr0_eax.bits.zmm32 512 __ movl(rax, 0xE0); 513 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm 514 __ cmpl(rax, 0xE0); 515 __ jcc(Assembler::notEqual, legacy_save_restore); 516 517 if (FLAG_IS_DEFAULT(UseAVX)) { 518 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 519 __ movl(rax, Address(rsi, 0)); 520 __ cmpl(rax, 0x50654); // If it is Skylake 521 __ jcc(Assembler::equal, legacy_save_restore); 522 } 523 // EVEX check: run in lowest evex mode 524 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts 525 UseAVX = 3; 526 UseSSE = 2; 527 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset()))); 528 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit); 529 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit); 530 #ifdef _LP64 531 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit); 532 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit); 533 #endif 534 535 #ifdef _WINDOWS 536 #ifdef _LP64 537 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit); 538 __ addptr(rsp, 64); 539 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit); 540 __ addptr(rsp, 64); 541 #endif // _LP64 542 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit); 543 __ addptr(rsp, 64); 544 #endif // _WINDOWS 545 generate_vzeroupper(wrapup); 546 VM_Version::clean_cpuFeatures(); 547 UseAVX = saved_useavx; 548 UseSSE = saved_usesse; 549 __ jmp(wrapup); 550 } 551 552 __ bind(legacy_save_restore); 553 // AVX check 554 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts 555 UseAVX = 1; 556 UseSSE = 2; 557 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset()))); 558 __ vmovdqu(Address(rsi, 0), xmm0); 559 __ vmovdqu(Address(rsi, 32), xmm7); 560 #ifdef _LP64 561 __ vmovdqu(Address(rsi, 64), xmm8); 562 __ vmovdqu(Address(rsi, 96), xmm15); 563 #endif 564 565 #ifdef _WINDOWS 566 #ifdef _LP64 567 __ vmovdqu(xmm15, Address(rsp, 0)); 568 __ addptr(rsp, 32); 569 __ vmovdqu(xmm8, Address(rsp, 0)); 570 __ addptr(rsp, 32); 571 #endif // _LP64 572 __ vmovdqu(xmm7, Address(rsp, 0)); 573 __ addptr(rsp, 32); 574 #endif // _WINDOWS 575 generate_vzeroupper(wrapup); 576 VM_Version::clean_cpuFeatures(); 577 UseAVX = saved_useavx; 578 UseSSE = saved_usesse; 579 580 __ bind(wrapup); 581 __ popf(); 582 __ pop(rsi); 583 __ pop(rbx); 584 __ pop(rbp); 585 __ ret(0); 586 587 # undef __ 588 589 return start; 590 }; 591 void generate_vzeroupper(Label& L_wrapup) { 592 # define __ _masm-> 593 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); 594 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG' 595 __ jcc(Assembler::notEqual, L_wrapup); 596 __ movl(rcx, 0x0FFF0FF0); 597 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset()))); 598 __ andl(rcx, Address(rsi, 0)); 599 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200 600 __ jcc(Assembler::equal, L_wrapup); 601 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi 602 __ jcc(Assembler::equal, L_wrapup); 603 // vzeroupper() will use a pre-computed instruction sequence that we 604 // can't compute until after we've determined CPU capabilities. Use 605 // uncached variant here directly to be able to bootstrap correctly 606 __ vzeroupper_uncached(); 607 # undef __ 608 } 609 address generate_detect_virt() { 610 StubCodeMark mark(this, "VM_Version", "detect_virt_stub"); 611 # define __ _masm-> 612 613 address start = __ pc(); 614 615 // Evacuate callee-saved registers 616 __ push(rbp); 617 __ push(rbx); 618 __ push(rsi); // for Windows 619 620 #ifdef _LP64 621 __ mov(rax, c_rarg0); // CPUID leaf 622 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx) 623 #else 624 __ movptr(rax, Address(rsp, 16)); // CPUID leaf 625 __ movptr(rsi, Address(rsp, 20)); // register array address 626 #endif 627 628 __ cpuid(); 629 630 // Store result to register array 631 __ movl(Address(rsi, 0), rax); 632 __ movl(Address(rsi, 4), rbx); 633 __ movl(Address(rsi, 8), rcx); 634 __ movl(Address(rsi, 12), rdx); 635 636 // Epilogue 637 __ pop(rsi); 638 __ pop(rbx); 639 __ pop(rbp); 640 __ ret(0); 641 642 # undef __ 643 644 return start; 645 }; 646 647 648 address generate_getCPUIDBrandString(void) { 649 // Flags to test CPU type. 650 const uint32_t HS_EFL_AC = 0x40000; 651 const uint32_t HS_EFL_ID = 0x200000; 652 // Values for when we don't have a CPUID instruction. 653 const int CPU_FAMILY_SHIFT = 8; 654 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT); 655 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT); 656 657 Label detect_486, cpu486, detect_586, done, ext_cpuid; 658 659 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub"); 660 # define __ _masm-> 661 662 address start = __ pc(); 663 664 // 665 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info); 666 // 667 // LP64: rcx and rdx are first and second argument registers on windows 668 669 __ push(rbp); 670 #ifdef _LP64 671 __ mov(rbp, c_rarg0); // cpuid_info address 672 #else 673 __ movptr(rbp, Address(rsp, 8)); // cpuid_info address 674 #endif 675 __ push(rbx); 676 __ push(rsi); 677 __ pushf(); // preserve rbx, and flags 678 __ pop(rax); 679 __ push(rax); 680 __ mov(rcx, rax); 681 // 682 // if we are unable to change the AC flag, we have a 386 683 // 684 __ xorl(rax, HS_EFL_AC); 685 __ push(rax); 686 __ popf(); 687 __ pushf(); 688 __ pop(rax); 689 __ cmpptr(rax, rcx); 690 __ jccb(Assembler::notEqual, detect_486); 691 692 __ movl(rax, CPU_FAMILY_386); 693 __ jmp(done); 694 695 // 696 // If we are unable to change the ID flag, we have a 486 which does 697 // not support the "cpuid" instruction. 698 // 699 __ bind(detect_486); 700 __ mov(rax, rcx); 701 __ xorl(rax, HS_EFL_ID); 702 __ push(rax); 703 __ popf(); 704 __ pushf(); 705 __ pop(rax); 706 __ cmpptr(rcx, rax); 707 __ jccb(Assembler::notEqual, detect_586); 708 709 __ bind(cpu486); 710 __ movl(rax, CPU_FAMILY_486); 711 __ jmp(done); 712 713 // 714 // At this point, we have a chip which supports the "cpuid" instruction 715 // 716 __ bind(detect_586); 717 __ xorl(rax, rax); 718 __ cpuid(); 719 __ orl(rax, rax); 720 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input 721 // value of at least 1, we give up and 722 // assume a 486 723 724 // 725 // Extended cpuid(0x80000000) for processor brand string detection 726 // 727 __ bind(ext_cpuid); 728 __ movl(rax, CPUID_EXTENDED_FN); 729 __ cpuid(); 730 __ cmpl(rax, CPUID_EXTENDED_FN_4); 731 __ jcc(Assembler::below, done); 732 733 // 734 // Extended cpuid(0x80000002) // first 16 bytes in brand string 735 // 736 __ movl(rax, CPUID_EXTENDED_FN_2); 737 __ cpuid(); 738 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset()))); 739 __ movl(Address(rsi, 0), rax); 740 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset()))); 741 __ movl(Address(rsi, 0), rbx); 742 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset()))); 743 __ movl(Address(rsi, 0), rcx); 744 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset()))); 745 __ movl(Address(rsi,0), rdx); 746 747 // 748 // Extended cpuid(0x80000003) // next 16 bytes in brand string 749 // 750 __ movl(rax, CPUID_EXTENDED_FN_3); 751 __ cpuid(); 752 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset()))); 753 __ movl(Address(rsi, 0), rax); 754 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset()))); 755 __ movl(Address(rsi, 0), rbx); 756 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset()))); 757 __ movl(Address(rsi, 0), rcx); 758 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset()))); 759 __ movl(Address(rsi,0), rdx); 760 761 // 762 // Extended cpuid(0x80000004) // last 16 bytes in brand string 763 // 764 __ movl(rax, CPUID_EXTENDED_FN_4); 765 __ cpuid(); 766 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset()))); 767 __ movl(Address(rsi, 0), rax); 768 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset()))); 769 __ movl(Address(rsi, 0), rbx); 770 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset()))); 771 __ movl(Address(rsi, 0), rcx); 772 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset()))); 773 __ movl(Address(rsi,0), rdx); 774 775 // 776 // return 777 // 778 __ bind(done); 779 __ popf(); 780 __ pop(rsi); 781 __ pop(rbx); 782 __ pop(rbp); 783 __ ret(0); 784 785 # undef __ 786 787 return start; 788 }; 789 }; 790 791 void VM_Version::get_processor_features() { 792 793 _cpu = 4; // 486 by default 794 _model = 0; 795 _stepping = 0; 796 _features = 0; 797 _logical_processors_per_package = 1; 798 // i486 internal cache is both I&D and has a 16-byte line size 799 _L1_data_cache_line_size = 16; 800 801 // Get raw processor info 802 803 get_cpu_info_stub(&_cpuid_info); 804 805 assert_is_initialized(); 806 _cpu = extended_cpu_family(); 807 _model = extended_cpu_model(); 808 _stepping = cpu_stepping(); 809 810 if (cpu_family() > 4) { // it supports CPUID 811 _features = feature_flags(); 812 // Logical processors are only available on P4s and above, 813 // and only if hyperthreading is available. 814 _logical_processors_per_package = logical_processor_count(); 815 _L1_data_cache_line_size = L1_line_size(); 816 } 817 818 _supports_cx8 = supports_cmpxchg8(); 819 // xchg and xadd instructions 820 _supports_atomic_getset4 = true; 821 _supports_atomic_getadd4 = true; 822 LP64_ONLY(_supports_atomic_getset8 = true); 823 LP64_ONLY(_supports_atomic_getadd8 = true); 824 825 #ifdef _LP64 826 // OS should support SSE for x64 and hardware should support at least SSE2. 827 if (!VM_Version::supports_sse2()) { 828 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported"); 829 } 830 // in 64 bit the use of SSE2 is the minimum 831 if (UseSSE < 2) UseSSE = 2; 832 #endif 833 834 #ifdef AMD64 835 // flush_icache_stub have to be generated first. 836 // That is why Icache line size is hard coded in ICache class, 837 // see icache_x86.hpp. It is also the reason why we can't use 838 // clflush instruction in 32-bit VM since it could be running 839 // on CPU which does not support it. 840 // 841 // The only thing we can do is to verify that flushed 842 // ICache::line_size has correct value. 843 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported"); 844 // clflush_size is size in quadwords (8 bytes). 845 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported"); 846 #endif 847 848 #ifdef _LP64 849 // assigning this field effectively enables Unsafe.writebackMemory() 850 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero 851 // that is only implemented on x86_64 and only if the OS plays ball 852 if (os::supports_map_sync()) { 853 // publish data cache line flush size to generic field, otherwise 854 // let if default to zero thereby disabling writeback 855 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8; 856 } 857 #endif 858 // If the OS doesn't support SSE, we can't use this feature even if the HW does 859 if (!os::supports_sse()) 860 _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2); 861 862 if (UseSSE < 4) { 863 _features &= ~CPU_SSE4_1; 864 _features &= ~CPU_SSE4_2; 865 } 866 867 if (UseSSE < 3) { 868 _features &= ~CPU_SSE3; 869 _features &= ~CPU_SSSE3; 870 _features &= ~CPU_SSE4A; 871 } 872 873 if (UseSSE < 2) 874 _features &= ~CPU_SSE2; 875 876 if (UseSSE < 1) 877 _features &= ~CPU_SSE; 878 879 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0. 880 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) { 881 UseAVX = 0; 882 } 883 884 // first try initial setting and detect what we can support 885 int use_avx_limit = 0; 886 if (UseAVX > 0) { 887 if (UseAVX > 2 && supports_evex()) { 888 use_avx_limit = 3; 889 } else if (UseAVX > 1 && supports_avx2()) { 890 use_avx_limit = 2; 891 } else if (UseAVX > 0 && supports_avx()) { 892 use_avx_limit = 1; 893 } else { 894 use_avx_limit = 0; 895 } 896 } 897 if (FLAG_IS_DEFAULT(UseAVX)) { 898 // Don't use AVX-512 on older Skylakes unless explicitly requested. 899 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) { 900 FLAG_SET_DEFAULT(UseAVX, 2); 901 } else { 902 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 903 } 904 } 905 if (UseAVX > use_avx_limit) { 906 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit); 907 FLAG_SET_DEFAULT(UseAVX, use_avx_limit); 908 } else if (UseAVX < 0) { 909 warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX); 910 FLAG_SET_DEFAULT(UseAVX, 0); 911 } 912 913 if (UseAVX < 3) { 914 _features &= ~CPU_AVX512F; 915 _features &= ~CPU_AVX512DQ; 916 _features &= ~CPU_AVX512CD; 917 _features &= ~CPU_AVX512BW; 918 _features &= ~CPU_AVX512VL; 919 _features &= ~CPU_AVX512_VPOPCNTDQ; 920 _features &= ~CPU_AVX512_VPCLMULQDQ; 921 _features &= ~CPU_AVX512_VAES; 922 _features &= ~CPU_AVX512_VNNI; 923 _features &= ~CPU_AVX512_VBMI; 924 _features &= ~CPU_AVX512_VBMI2; 925 _features &= ~CPU_AVX512_BITALG; 926 } 927 928 if (UseAVX < 2) 929 _features &= ~CPU_AVX2; 930 931 if (UseAVX < 1) { 932 _features &= ~CPU_AVX; 933 _features &= ~CPU_VZEROUPPER; 934 } 935 936 if (logical_processors_per_package() == 1) { 937 // HT processor could be installed on a system which doesn't support HT. 938 _features &= ~CPU_HT; 939 } 940 941 if (is_intel()) { // Intel cpus specific settings 942 if (is_knights_family()) { 943 _features &= ~CPU_VZEROUPPER; 944 _features &= ~CPU_AVX512BW; 945 _features &= ~CPU_AVX512VL; 946 _features &= ~CPU_AVX512DQ; 947 _features &= ~CPU_AVX512_VNNI; 948 _features &= ~CPU_AVX512_VAES; 949 _features &= ~CPU_AVX512_VPOPCNTDQ; 950 _features &= ~CPU_AVX512_VPCLMULQDQ; 951 _features &= ~CPU_AVX512_VBMI; 952 _features &= ~CPU_AVX512_VBMI2; 953 _features &= ~CPU_CLWB; 954 _features &= ~CPU_FLUSHOPT; 955 _features &= ~CPU_GFNI; 956 _features &= ~CPU_AVX512_BITALG; 957 } 958 } 959 960 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) { 961 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum(); 962 } else { 963 _has_intel_jcc_erratum = IntelJccErratumMitigation; 964 } 965 966 char buf[512]; 967 int res = jio_snprintf( 968 buf, sizeof(buf), 969 "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x", 970 cores_per_cpu(), threads_per_core(), 971 cpu_family(), _model, _stepping, os::cpu_microcode_revision()); 972 assert(res > 0, "not enough temporary space allocated"); 973 insert_features_names(buf + res, sizeof(buf) - res, _features_names); 974 975 _features_string = os::strdup(buf); 976 977 // UseSSE is set to the smaller of what hardware supports and what 978 // the command line requires. I.e., you cannot set UseSSE to 2 on 979 // older Pentiums which do not support it. 980 int use_sse_limit = 0; 981 if (UseSSE > 0) { 982 if (UseSSE > 3 && supports_sse4_1()) { 983 use_sse_limit = 4; 984 } else if (UseSSE > 2 && supports_sse3()) { 985 use_sse_limit = 3; 986 } else if (UseSSE > 1 && supports_sse2()) { 987 use_sse_limit = 2; 988 } else if (UseSSE > 0 && supports_sse()) { 989 use_sse_limit = 1; 990 } else { 991 use_sse_limit = 0; 992 } 993 } 994 if (FLAG_IS_DEFAULT(UseSSE)) { 995 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 996 } else if (UseSSE > use_sse_limit) { 997 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit); 998 FLAG_SET_DEFAULT(UseSSE, use_sse_limit); 999 } else if (UseSSE < 0) { 1000 warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE); 1001 FLAG_SET_DEFAULT(UseSSE, 0); 1002 } 1003 1004 // Use AES instructions if available. 1005 if (supports_aes()) { 1006 if (FLAG_IS_DEFAULT(UseAES)) { 1007 FLAG_SET_DEFAULT(UseAES, true); 1008 } 1009 if (!UseAES) { 1010 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1011 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled."); 1012 } 1013 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1014 } else { 1015 if (UseSSE > 2) { 1016 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1017 FLAG_SET_DEFAULT(UseAESIntrinsics, true); 1018 } 1019 } else { 1020 // The AES intrinsic stubs require AES instruction support (of course) 1021 // but also require sse3 mode or higher for instructions it use. 1022 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1023 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled."); 1024 } 1025 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1026 } 1027 1028 // --AES-CTR begins-- 1029 if (!UseAESIntrinsics) { 1030 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1031 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled."); 1032 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1033 } 1034 } else { 1035 if (supports_sse4_1()) { 1036 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1037 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true); 1038 } 1039 } else { 1040 // The AES-CTR intrinsic stubs require AES instruction support (of course) 1041 // but also require sse4.1 mode or higher for instructions it use. 1042 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1043 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled."); 1044 } 1045 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1046 } 1047 } 1048 // --AES-CTR ends-- 1049 } 1050 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) { 1051 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) { 1052 warning("AES instructions are not available on this CPU"); 1053 FLAG_SET_DEFAULT(UseAES, false); 1054 } 1055 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1056 warning("AES intrinsics are not available on this CPU"); 1057 FLAG_SET_DEFAULT(UseAESIntrinsics, false); 1058 } 1059 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) { 1060 warning("AES-CTR intrinsics are not available on this CPU"); 1061 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false); 1062 } 1063 } 1064 1065 // Use CLMUL instructions if available. 1066 if (supports_clmul()) { 1067 if (FLAG_IS_DEFAULT(UseCLMUL)) { 1068 UseCLMUL = true; 1069 } 1070 } else if (UseCLMUL) { 1071 if (!FLAG_IS_DEFAULT(UseCLMUL)) 1072 warning("CLMUL instructions not available on this CPU (AVX may also be required)"); 1073 FLAG_SET_DEFAULT(UseCLMUL, false); 1074 } 1075 1076 if (UseCLMUL && (UseSSE > 2)) { 1077 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { 1078 UseCRC32Intrinsics = true; 1079 } 1080 } else if (UseCRC32Intrinsics) { 1081 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) 1082 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)"); 1083 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); 1084 } 1085 1086 #ifdef _LP64 1087 if (supports_avx2()) { 1088 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1089 UseAdler32Intrinsics = true; 1090 } 1091 } else if (UseAdler32Intrinsics) { 1092 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) { 1093 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)"); 1094 } 1095 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1096 } 1097 #else 1098 if (UseAdler32Intrinsics) { 1099 warning("Adler32Intrinsics not available on this CPU."); 1100 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false); 1101 } 1102 #endif 1103 1104 if (supports_sse4_2() && supports_clmul()) { 1105 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1106 UseCRC32CIntrinsics = true; 1107 } 1108 } else if (UseCRC32CIntrinsics) { 1109 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) { 1110 warning("CRC32C intrinsics are not available on this CPU"); 1111 } 1112 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); 1113 } 1114 1115 // GHASH/GCM intrinsics 1116 if (UseCLMUL && (UseSSE > 2)) { 1117 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) { 1118 UseGHASHIntrinsics = true; 1119 } 1120 } else if (UseGHASHIntrinsics) { 1121 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) 1122 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU"); 1123 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false); 1124 } 1125 1126 // Base64 Intrinsics (Check the condition for which the intrinsic will be active) 1127 if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) { 1128 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) { 1129 UseBASE64Intrinsics = true; 1130 } 1131 } else if (UseBASE64Intrinsics) { 1132 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) 1133 warning("Base64 intrinsic requires EVEX instructions on this CPU"); 1134 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false); 1135 } 1136 1137 if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions 1138 if (FLAG_IS_DEFAULT(UseFMA)) { 1139 UseFMA = true; 1140 } 1141 } else if (UseFMA) { 1142 warning("FMA instructions are not available on this CPU"); 1143 FLAG_SET_DEFAULT(UseFMA, false); 1144 } 1145 1146 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) { 1147 UseMD5Intrinsics = true; 1148 } 1149 1150 if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) { 1151 if (FLAG_IS_DEFAULT(UseSHA)) { 1152 UseSHA = true; 1153 } 1154 } else if (UseSHA) { 1155 warning("SHA instructions are not available on this CPU"); 1156 FLAG_SET_DEFAULT(UseSHA, false); 1157 } 1158 1159 if (supports_sha() && supports_sse4_1() && UseSHA) { 1160 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) { 1161 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true); 1162 } 1163 } else if (UseSHA1Intrinsics) { 1164 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU."); 1165 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); 1166 } 1167 1168 if (supports_sse4_1() && UseSHA) { 1169 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) { 1170 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true); 1171 } 1172 } else if (UseSHA256Intrinsics) { 1173 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU."); 1174 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); 1175 } 1176 1177 #ifdef _LP64 1178 // These are only supported on 64-bit 1179 if (UseSHA && supports_avx2() && supports_bmi2()) { 1180 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) { 1181 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true); 1182 } 1183 } else 1184 #endif 1185 if (UseSHA512Intrinsics) { 1186 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU."); 1187 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); 1188 } 1189 1190 if (UseSHA3Intrinsics) { 1191 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU."); 1192 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false); 1193 } 1194 1195 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) { 1196 FLAG_SET_DEFAULT(UseSHA, false); 1197 } 1198 1199 if (!supports_rtm() && UseRTMLocking) { 1200 vm_exit_during_initialization("RTM instructions are not available on this CPU"); 1201 } 1202 1203 #if INCLUDE_RTM_OPT 1204 if (UseRTMLocking) { 1205 if (!CompilerConfig::is_c2_enabled()) { 1206 // Only C2 does RTM locking optimization. 1207 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1208 } 1209 if (is_intel_family_core()) { 1210 if ((_model == CPU_MODEL_HASWELL_E3) || 1211 (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) || 1212 (_model == CPU_MODEL_BROADWELL && _stepping < 4)) { 1213 // currently a collision between SKL and HSW_E3 1214 if (!UnlockExperimentalVMOptions && UseAVX < 3) { 1215 vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this " 1216 "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag."); 1217 } else { 1218 warning("UseRTMLocking is only available as experimental option on this platform."); 1219 } 1220 } 1221 } 1222 if (!FLAG_IS_CMDLINE(UseRTMLocking)) { 1223 // RTM locking should be used only for applications with 1224 // high lock contention. For now we do not use it by default. 1225 vm_exit_during_initialization("UseRTMLocking flag should be only set on command line"); 1226 } 1227 } else { // !UseRTMLocking 1228 if (UseRTMForStackLocks) { 1229 if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) { 1230 warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off"); 1231 } 1232 FLAG_SET_DEFAULT(UseRTMForStackLocks, false); 1233 } 1234 if (UseRTMDeopt) { 1235 FLAG_SET_DEFAULT(UseRTMDeopt, false); 1236 } 1237 if (PrintPreciseRTMLockingStatistics) { 1238 FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false); 1239 } 1240 } 1241 #else 1242 if (UseRTMLocking) { 1243 // Only C2 does RTM locking optimization. 1244 vm_exit_during_initialization("RTM locking optimization is not supported in this VM"); 1245 } 1246 #endif 1247 1248 #ifdef COMPILER2 1249 if (UseFPUForSpilling) { 1250 if (UseSSE < 2) { 1251 // Only supported with SSE2+ 1252 FLAG_SET_DEFAULT(UseFPUForSpilling, false); 1253 } 1254 } 1255 #endif 1256 1257 #if COMPILER2_OR_JVMCI 1258 int max_vector_size = 0; 1259 if (UseSSE < 2) { 1260 // Vectors (in XMM) are only supported with SSE2+ 1261 // SSE is always 2 on x64. 1262 max_vector_size = 0; 1263 } else if (UseAVX == 0 || !os_supports_avx_vectors()) { 1264 // 16 byte vectors (in XMM) are supported with SSE2+ 1265 max_vector_size = 16; 1266 } else if (UseAVX == 1 || UseAVX == 2) { 1267 // 32 bytes vectors (in YMM) are only supported with AVX+ 1268 max_vector_size = 32; 1269 } else if (UseAVX > 2) { 1270 // 64 bytes vectors (in ZMM) are only supported with AVX 3 1271 max_vector_size = 64; 1272 } 1273 1274 #ifdef _LP64 1275 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit 1276 #else 1277 int min_vector_size = 0; 1278 #endif 1279 1280 if (!FLAG_IS_DEFAULT(MaxVectorSize)) { 1281 if (MaxVectorSize < min_vector_size) { 1282 warning("MaxVectorSize must be at least %i on this platform", min_vector_size); 1283 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size); 1284 } 1285 if (MaxVectorSize > max_vector_size) { 1286 warning("MaxVectorSize must be at most %i on this platform", max_vector_size); 1287 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1288 } 1289 if (!is_power_of_2(MaxVectorSize)) { 1290 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size); 1291 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1292 } 1293 } else { 1294 // If default, use highest supported configuration 1295 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size); 1296 } 1297 1298 #if defined(COMPILER2) && defined(ASSERT) 1299 if (MaxVectorSize > 0) { 1300 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) { 1301 tty->print_cr("State of YMM registers after signal handle:"); 1302 int nreg = 2 LP64_ONLY(+2); 1303 const char* ymm_name[4] = {"0", "7", "8", "15"}; 1304 for (int i = 0; i < nreg; i++) { 1305 tty->print("YMM%s:", ymm_name[i]); 1306 for (int j = 7; j >=0; j--) { 1307 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]); 1308 } 1309 tty->cr(); 1310 } 1311 } 1312 } 1313 #endif // COMPILER2 && ASSERT 1314 1315 #ifdef _LP64 1316 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1317 UseMultiplyToLenIntrinsic = true; 1318 } 1319 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1320 UseSquareToLenIntrinsic = true; 1321 } 1322 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1323 UseMulAddIntrinsic = true; 1324 } 1325 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1326 UseMontgomeryMultiplyIntrinsic = true; 1327 } 1328 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1329 UseMontgomerySquareIntrinsic = true; 1330 } 1331 #else 1332 if (UseMultiplyToLenIntrinsic) { 1333 if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) { 1334 warning("multiplyToLen intrinsic is not available in 32-bit VM"); 1335 } 1336 FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false); 1337 } 1338 if (UseMontgomeryMultiplyIntrinsic) { 1339 if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { 1340 warning("montgomeryMultiply intrinsic is not available in 32-bit VM"); 1341 } 1342 FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false); 1343 } 1344 if (UseMontgomerySquareIntrinsic) { 1345 if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { 1346 warning("montgomerySquare intrinsic is not available in 32-bit VM"); 1347 } 1348 FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false); 1349 } 1350 if (UseSquareToLenIntrinsic) { 1351 if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) { 1352 warning("squareToLen intrinsic is not available in 32-bit VM"); 1353 } 1354 FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false); 1355 } 1356 if (UseMulAddIntrinsic) { 1357 if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { 1358 warning("mulAdd intrinsic is not available in 32-bit VM"); 1359 } 1360 FLAG_SET_DEFAULT(UseMulAddIntrinsic, false); 1361 } 1362 #endif // _LP64 1363 #endif // COMPILER2_OR_JVMCI 1364 1365 // On new cpus instructions which update whole XMM register should be used 1366 // to prevent partial register stall due to dependencies on high half. 1367 // 1368 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem) 1369 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem) 1370 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm). 1371 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm). 1372 1373 1374 if (is_zx()) { // ZX cpus specific settings 1375 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1376 UseStoreImmI16 = false; // don't use it on ZX cpus 1377 } 1378 if ((cpu_family() == 6) || (cpu_family() == 7)) { 1379 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1380 // Use it on all ZX cpus 1381 UseAddressNop = true; 1382 } 1383 } 1384 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1385 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus 1386 } 1387 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1388 if (supports_sse3()) { 1389 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus 1390 } else { 1391 UseXmmRegToRegMoveAll = false; 1392 } 1393 } 1394 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus 1395 #ifdef COMPILER2 1396 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1397 // For new ZX cpus do the next optimization: 1398 // don't align the beginning of a loop if there are enough instructions 1399 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1400 // in current fetch line (OptoLoopAlignment) or the padding 1401 // is big (> MaxLoopPad). 1402 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of 1403 // generated NOP instructions. 11 is the largest size of one 1404 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1405 MaxLoopPad = 11; 1406 } 1407 #endif // COMPILER2 1408 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1409 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus 1410 } 1411 if (supports_sse4_2()) { // new ZX cpus 1412 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1413 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus 1414 } 1415 } 1416 if (supports_sse4_2()) { 1417 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1418 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1419 } 1420 } else { 1421 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1422 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1423 } 1424 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1425 } 1426 } 1427 1428 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1429 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1430 } 1431 } 1432 1433 if (is_amd_family()) { // AMD cpus specific settings 1434 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) { 1435 // Use it on new AMD cpus starting from Opteron. 1436 UseAddressNop = true; 1437 } 1438 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) { 1439 // Use it on new AMD cpus starting from Opteron. 1440 UseNewLongLShift = true; 1441 } 1442 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1443 if (supports_sse4a()) { 1444 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron 1445 } else { 1446 UseXmmLoadAndClearUpper = false; 1447 } 1448 } 1449 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1450 if (supports_sse4a()) { 1451 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h' 1452 } else { 1453 UseXmmRegToRegMoveAll = false; 1454 } 1455 } 1456 if (FLAG_IS_DEFAULT(UseXmmI2F)) { 1457 if (supports_sse4a()) { 1458 UseXmmI2F = true; 1459 } else { 1460 UseXmmI2F = false; 1461 } 1462 } 1463 if (FLAG_IS_DEFAULT(UseXmmI2D)) { 1464 if (supports_sse4a()) { 1465 UseXmmI2D = true; 1466 } else { 1467 UseXmmI2D = false; 1468 } 1469 } 1470 if (supports_sse4_2()) { 1471 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1472 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1473 } 1474 } else { 1475 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1476 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1477 } 1478 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1479 } 1480 1481 // some defaults for AMD family 15h 1482 if (cpu_family() == 0x15) { 1483 // On family 15h processors default is no sw prefetch 1484 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1485 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1486 } 1487 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW 1488 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1489 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1490 } 1491 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy 1492 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1493 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1494 } 1495 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1496 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1497 } 1498 } 1499 1500 #ifdef COMPILER2 1501 if (cpu_family() < 0x17 && MaxVectorSize > 16) { 1502 // Limit vectors size to 16 bytes on AMD cpus < 17h. 1503 FLAG_SET_DEFAULT(MaxVectorSize, 16); 1504 } 1505 #endif // COMPILER2 1506 1507 // Some defaults for AMD family >= 17h && Hygon family 18h 1508 if (cpu_family() >= 0x17) { 1509 // On family >=17h processors use XMM and UnalignedLoadStores 1510 // for Array Copy 1511 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1512 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true); 1513 } 1514 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1515 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true); 1516 } 1517 #ifdef COMPILER2 1518 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1519 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1520 } 1521 #endif 1522 } 1523 } 1524 1525 if (is_intel()) { // Intel cpus specific settings 1526 if (FLAG_IS_DEFAULT(UseStoreImmI16)) { 1527 UseStoreImmI16 = false; // don't use it on Intel cpus 1528 } 1529 if (cpu_family() == 6 || cpu_family() == 15) { 1530 if (FLAG_IS_DEFAULT(UseAddressNop)) { 1531 // Use it on all Intel cpus starting from PentiumPro 1532 UseAddressNop = true; 1533 } 1534 } 1535 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) { 1536 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus 1537 } 1538 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) { 1539 if (supports_sse3()) { 1540 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus 1541 } else { 1542 UseXmmRegToRegMoveAll = false; 1543 } 1544 } 1545 if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus 1546 #ifdef COMPILER2 1547 if (FLAG_IS_DEFAULT(MaxLoopPad)) { 1548 // For new Intel cpus do the next optimization: 1549 // don't align the beginning of a loop if there are enough instructions 1550 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp) 1551 // in current fetch line (OptoLoopAlignment) or the padding 1552 // is big (> MaxLoopPad). 1553 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of 1554 // generated NOP instructions. 11 is the largest size of one 1555 // address NOP instruction '0F 1F' (see Assembler::nop(i)). 1556 MaxLoopPad = 11; 1557 } 1558 #endif // COMPILER2 1559 1560 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) { 1561 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus 1562 } 1563 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus 1564 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1565 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1566 } 1567 } 1568 if (supports_sse4_2()) { 1569 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) { 1570 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true); 1571 } 1572 } else { 1573 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) { 1574 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled."); 1575 } 1576 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false); 1577 } 1578 } 1579 if (is_atom_family() || is_knights_family()) { 1580 #ifdef COMPILER2 1581 if (FLAG_IS_DEFAULT(OptoScheduling)) { 1582 OptoScheduling = true; 1583 } 1584 #endif 1585 if (supports_sse4_2()) { // Silvermont 1586 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) { 1587 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus 1588 } 1589 } 1590 if (FLAG_IS_DEFAULT(UseIncDec)) { 1591 FLAG_SET_DEFAULT(UseIncDec, false); 1592 } 1593 } 1594 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) { 1595 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1596 } 1597 #ifdef COMPILER2 1598 if (UseAVX > 2) { 1599 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) || 1600 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) && 1601 ArrayOperationPartialInlineSize != 0 && 1602 ArrayOperationPartialInlineSize != 16 && 1603 ArrayOperationPartialInlineSize != 32 && 1604 ArrayOperationPartialInlineSize != 64)) { 1605 int inline_size = 0; 1606 if (MaxVectorSize >= 64 && AVX3Threshold == 0) { 1607 inline_size = 64; 1608 } else if (MaxVectorSize >= 32) { 1609 inline_size = 32; 1610 } else if (MaxVectorSize >= 16) { 1611 inline_size = 16; 1612 } 1613 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) { 1614 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size); 1615 } 1616 ArrayOperationPartialInlineSize = inline_size; 1617 } 1618 1619 if (ArrayOperationPartialInlineSize > MaxVectorSize) { 1620 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0; 1621 if (ArrayOperationPartialInlineSize) { 1622 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize); 1623 } else { 1624 warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize); 1625 } 1626 } 1627 } 1628 #endif 1629 } 1630 1631 #ifdef COMPILER2 1632 if (FLAG_IS_DEFAULT(OptimizeFill)) { 1633 if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) { 1634 OptimizeFill = false; 1635 } 1636 } 1637 #endif 1638 1639 #ifdef _LP64 1640 if (UseSSE42Intrinsics) { 1641 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1642 UseVectorizedMismatchIntrinsic = true; 1643 } 1644 } else if (UseVectorizedMismatchIntrinsic) { 1645 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) 1646 warning("vectorizedMismatch intrinsics are not available on this CPU"); 1647 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1648 } 1649 #else 1650 if (UseVectorizedMismatchIntrinsic) { 1651 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) { 1652 warning("vectorizedMismatch intrinsic is not available in 32-bit VM"); 1653 } 1654 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false); 1655 } 1656 #endif // _LP64 1657 1658 // Use count leading zeros count instruction if available. 1659 if (supports_lzcnt()) { 1660 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) { 1661 UseCountLeadingZerosInstruction = true; 1662 } 1663 } else if (UseCountLeadingZerosInstruction) { 1664 warning("lzcnt instruction is not available on this CPU"); 1665 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false); 1666 } 1667 1668 // Use count trailing zeros instruction if available 1669 if (supports_bmi1()) { 1670 // tzcnt does not require VEX prefix 1671 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) { 1672 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1673 // Don't use tzcnt if BMI1 is switched off on command line. 1674 UseCountTrailingZerosInstruction = false; 1675 } else { 1676 UseCountTrailingZerosInstruction = true; 1677 } 1678 } 1679 } else if (UseCountTrailingZerosInstruction) { 1680 warning("tzcnt instruction is not available on this CPU"); 1681 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false); 1682 } 1683 1684 // BMI instructions (except tzcnt) use an encoding with VEX prefix. 1685 // VEX prefix is generated only when AVX > 0. 1686 if (supports_bmi1() && supports_avx()) { 1687 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) { 1688 UseBMI1Instructions = true; 1689 } 1690 } else if (UseBMI1Instructions) { 1691 warning("BMI1 instructions are not available on this CPU (AVX is also required)"); 1692 FLAG_SET_DEFAULT(UseBMI1Instructions, false); 1693 } 1694 1695 if (supports_bmi2() && supports_avx()) { 1696 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) { 1697 UseBMI2Instructions = true; 1698 } 1699 } else if (UseBMI2Instructions) { 1700 warning("BMI2 instructions are not available on this CPU (AVX is also required)"); 1701 FLAG_SET_DEFAULT(UseBMI2Instructions, false); 1702 } 1703 1704 // Use population count instruction if available. 1705 if (supports_popcnt()) { 1706 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) { 1707 UsePopCountInstruction = true; 1708 } 1709 } else if (UsePopCountInstruction) { 1710 warning("POPCNT instruction is not available on this CPU"); 1711 FLAG_SET_DEFAULT(UsePopCountInstruction, false); 1712 } 1713 1714 // Use fast-string operations if available. 1715 if (supports_erms()) { 1716 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1717 UseFastStosb = true; 1718 } 1719 } else if (UseFastStosb) { 1720 warning("fast-string operations are not available on this CPU"); 1721 FLAG_SET_DEFAULT(UseFastStosb, false); 1722 } 1723 1724 // For AMD Processors use XMM/YMM MOVDQU instructions 1725 // for Object Initialization as default 1726 if (is_amd() && cpu_family() >= 0x19) { 1727 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1728 UseFastStosb = false; 1729 } 1730 } 1731 1732 #ifdef COMPILER2 1733 if (is_intel() && MaxVectorSize > 16) { 1734 if (FLAG_IS_DEFAULT(UseFastStosb)) { 1735 UseFastStosb = false; 1736 } 1737 } 1738 #endif 1739 1740 // Use XMM/YMM MOVDQU instruction for Object Initialization 1741 if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) { 1742 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) { 1743 UseXMMForObjInit = true; 1744 } 1745 } else if (UseXMMForObjInit) { 1746 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off."); 1747 FLAG_SET_DEFAULT(UseXMMForObjInit, false); 1748 } 1749 1750 #ifdef COMPILER2 1751 if (FLAG_IS_DEFAULT(AlignVector)) { 1752 // Modern processors allow misaligned memory operations for vectors. 1753 AlignVector = !UseUnalignedLoadStores; 1754 } 1755 #endif // COMPILER2 1756 1757 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) { 1758 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) { 1759 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0); 1760 } else if (!supports_sse() && supports_3dnow_prefetch()) { 1761 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3); 1762 } 1763 } 1764 1765 // Allocation prefetch settings 1766 intx cache_line_size = prefetch_data_size(); 1767 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) && 1768 (cache_line_size > AllocatePrefetchStepSize)) { 1769 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size); 1770 } 1771 1772 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) { 1773 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0"); 1774 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { 1775 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag."); 1776 } 1777 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0); 1778 } 1779 1780 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { 1781 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2); 1782 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch)); 1783 } 1784 1785 if (is_intel() && cpu_family() == 6 && supports_sse3()) { 1786 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) && 1787 supports_sse4_2() && supports_ht()) { // Nehalem based cpus 1788 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4); 1789 } 1790 #ifdef COMPILER2 1791 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) { 1792 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1793 } 1794 #endif 1795 } 1796 1797 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) { 1798 #ifdef COMPILER2 1799 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) { 1800 FLAG_SET_DEFAULT(UseFPUForSpilling, true); 1801 } 1802 #endif 1803 } 1804 1805 #ifdef _LP64 1806 // Prefetch settings 1807 1808 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from 1809 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap. 1810 // Tested intervals from 128 to 2048 in increments of 64 == one cache line. 1811 // 256 bytes (4 dcache lines) was the nearest runner-up to 576. 1812 1813 // gc copy/scan is disabled if prefetchw isn't supported, because 1814 // Prefetch::write emits an inlined prefetchw on Linux. 1815 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t. 1816 // The used prefetcht0 instruction works for both amd64 and em64t. 1817 1818 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) { 1819 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576); 1820 } 1821 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) { 1822 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576); 1823 } 1824 #endif 1825 1826 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) && 1827 (cache_line_size > ContendedPaddingWidth)) 1828 ContendedPaddingWidth = cache_line_size; 1829 1830 // This machine allows unaligned memory accesses 1831 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { 1832 FLAG_SET_DEFAULT(UseUnalignedAccesses, true); 1833 } 1834 1835 #ifndef PRODUCT 1836 if (log_is_enabled(Info, os, cpu)) { 1837 LogStream ls(Log(os, cpu)::info()); 1838 outputStream* log = &ls; 1839 log->print_cr("Logical CPUs per core: %u", 1840 logical_processors_per_package()); 1841 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size()); 1842 log->print("UseSSE=%d", (int) UseSSE); 1843 if (UseAVX > 0) { 1844 log->print(" UseAVX=%d", (int) UseAVX); 1845 } 1846 if (UseAES) { 1847 log->print(" UseAES=1"); 1848 } 1849 #ifdef COMPILER2 1850 if (MaxVectorSize > 0) { 1851 log->print(" MaxVectorSize=%d", (int) MaxVectorSize); 1852 } 1853 #endif 1854 log->cr(); 1855 log->print("Allocation"); 1856 if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) { 1857 log->print_cr(": no prefetching"); 1858 } else { 1859 log->print(" prefetching: "); 1860 if (UseSSE == 0 && supports_3dnow_prefetch()) { 1861 log->print("PREFETCHW"); 1862 } else if (UseSSE >= 1) { 1863 if (AllocatePrefetchInstr == 0) { 1864 log->print("PREFETCHNTA"); 1865 } else if (AllocatePrefetchInstr == 1) { 1866 log->print("PREFETCHT0"); 1867 } else if (AllocatePrefetchInstr == 2) { 1868 log->print("PREFETCHT2"); 1869 } else if (AllocatePrefetchInstr == 3) { 1870 log->print("PREFETCHW"); 1871 } 1872 } 1873 if (AllocatePrefetchLines > 1) { 1874 log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize); 1875 } else { 1876 log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize); 1877 } 1878 } 1879 1880 if (PrefetchCopyIntervalInBytes > 0) { 1881 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes); 1882 } 1883 if (PrefetchScanIntervalInBytes > 0) { 1884 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes); 1885 } 1886 if (ContendedPaddingWidth > 0) { 1887 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth); 1888 } 1889 } 1890 #endif // !PRODUCT 1891 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) { 1892 FLAG_SET_DEFAULT(UseSignumIntrinsic, true); 1893 } 1894 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) { 1895 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true); 1896 } 1897 } 1898 1899 void VM_Version::print_platform_virtualization_info(outputStream* st) { 1900 VirtualizationType vrt = VM_Version::get_detected_virtualization(); 1901 if (vrt == XenHVM) { 1902 st->print_cr("Xen hardware-assisted virtualization detected"); 1903 } else if (vrt == KVM) { 1904 st->print_cr("KVM virtualization detected"); 1905 } else if (vrt == VMWare) { 1906 st->print_cr("VMWare virtualization detected"); 1907 VirtualizationSupport::print_virtualization_info(st); 1908 } else if (vrt == HyperV) { 1909 st->print_cr("Hyper-V virtualization detected"); 1910 } else if (vrt == HyperVRole) { 1911 st->print_cr("Hyper-V role detected"); 1912 } 1913 } 1914 1915 bool VM_Version::compute_has_intel_jcc_erratum() { 1916 if (!is_intel_family_core()) { 1917 // Only Intel CPUs are affected. 1918 return false; 1919 } 1920 // The following table of affected CPUs is based on the following document released by Intel: 1921 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf 1922 switch (_model) { 1923 case 0x8E: 1924 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1925 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 1926 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e 1927 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y 1928 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e 1929 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1930 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y 1931 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42 1932 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U 1933 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC; 1934 case 0x4E: 1935 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U 1936 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e 1937 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y 1938 return _stepping == 0x3; 1939 case 0x55: 1940 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville 1941 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server 1942 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W 1943 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X 1944 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3 1945 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server) 1946 return _stepping == 0x4 || _stepping == 0x7; 1947 case 0x5E: 1948 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H 1949 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S 1950 return _stepping == 0x3; 1951 case 0x9E: 1952 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G 1953 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H 1954 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S 1955 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X 1956 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3 1957 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H 1958 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S 1959 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP 1960 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2) 1961 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2) 1962 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2) 1963 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2) 1964 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2) 1965 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2) 1966 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD; 1967 case 0xA5: 1968 // Not in Intel documentation. 1969 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H 1970 return true; 1971 case 0xA6: 1972 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62 1973 return _stepping == 0x0; 1974 case 0xAE: 1975 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2) 1976 return _stepping == 0xA; 1977 default: 1978 // If we are running on another intel machine not recognized in the table, we are okay. 1979 return false; 1980 } 1981 } 1982 1983 // On Xen, the cpuid instruction returns 1984 // eax / registers[0]: Version of Xen 1985 // ebx / registers[1]: chars 'XenV' 1986 // ecx / registers[2]: chars 'MMXe' 1987 // edx / registers[3]: chars 'nVMM' 1988 // 1989 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns 1990 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr' 1991 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof' 1992 // edx / registers[3]: chars 'M' / 'ware' / 't Hv' 1993 // 1994 // more information : 1995 // https://kb.vmware.com/s/article/1009458 1996 // 1997 void VM_Version::check_virtualizations() { 1998 uint32_t registers[4] = {0}; 1999 char signature[13] = {0}; 2000 2001 // Xen cpuid leaves can be found 0x100 aligned boundary starting 2002 // from 0x40000000 until 0x40010000. 2003 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html 2004 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) { 2005 detect_virt_stub(leaf, registers); 2006 memcpy(signature, ®isters[1], 12); 2007 2008 if (strncmp("VMwareVMware", signature, 12) == 0) { 2009 Abstract_VM_Version::_detected_virtualization = VMWare; 2010 // check for extended metrics from guestlib 2011 VirtualizationSupport::initialize(); 2012 } else if (strncmp("Microsoft Hv", signature, 12) == 0) { 2013 Abstract_VM_Version::_detected_virtualization = HyperV; 2014 #ifdef _WINDOWS 2015 // CPUID leaf 0x40000007 is available to the root partition only. 2016 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details. 2017 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf 2018 detect_virt_stub(0x40000007, registers); 2019 if ((registers[0] != 0x0) || 2020 (registers[1] != 0x0) || 2021 (registers[2] != 0x0) || 2022 (registers[3] != 0x0)) { 2023 Abstract_VM_Version::_detected_virtualization = HyperVRole; 2024 } 2025 #endif 2026 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) { 2027 Abstract_VM_Version::_detected_virtualization = KVM; 2028 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) { 2029 Abstract_VM_Version::_detected_virtualization = XenHVM; 2030 } 2031 } 2032 } 2033 2034 // avx3_threshold() sets the threshold at which 64-byte instructions are used 2035 // for implementing the array copy and clear operations. 2036 // The Intel platforms that supports the serialize instruction 2037 // has improved implementation of 64-byte load/stores and so the default 2038 // threshold is set to 0 for these platforms. 2039 int VM_Version::avx3_threshold() { 2040 return (is_intel_family_core() && 2041 supports_serialize() && 2042 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold; 2043 } 2044 2045 static bool _vm_version_initialized = false; 2046 2047 void VM_Version::initialize() { 2048 ResourceMark rm; 2049 // Making this stub must be FIRST use of assembler 2050 stub_blob = BufferBlob::create("VM_Version stub", stub_size); 2051 if (stub_blob == NULL) { 2052 vm_exit_during_initialization("Unable to allocate stub for VM_Version"); 2053 } 2054 CodeBuffer c(stub_blob); 2055 VM_Version_StubGenerator g(&c); 2056 2057 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, 2058 g.generate_get_cpu_info()); 2059 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t, 2060 g.generate_detect_virt()); 2061 2062 get_processor_features(); 2063 2064 LP64_ONLY(Assembler::precompute_instructions();) 2065 2066 if (VM_Version::supports_hv()) { // Supports hypervisor 2067 check_virtualizations(); 2068 } 2069 _vm_version_initialized = true; 2070 } 2071 2072 typedef enum { 2073 CPU_FAMILY_8086_8088 = 0, 2074 CPU_FAMILY_INTEL_286 = 2, 2075 CPU_FAMILY_INTEL_386 = 3, 2076 CPU_FAMILY_INTEL_486 = 4, 2077 CPU_FAMILY_PENTIUM = 5, 2078 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models 2079 CPU_FAMILY_PENTIUM_4 = 0xF 2080 } FamilyFlag; 2081 2082 typedef enum { 2083 RDTSCP_FLAG = 0x08000000, // bit 27 2084 INTEL64_FLAG = 0x20000000 // bit 29 2085 } _featureExtendedEdxFlag; 2086 2087 typedef enum { 2088 FPU_FLAG = 0x00000001, 2089 VME_FLAG = 0x00000002, 2090 DE_FLAG = 0x00000004, 2091 PSE_FLAG = 0x00000008, 2092 TSC_FLAG = 0x00000010, 2093 MSR_FLAG = 0x00000020, 2094 PAE_FLAG = 0x00000040, 2095 MCE_FLAG = 0x00000080, 2096 CX8_FLAG = 0x00000100, 2097 APIC_FLAG = 0x00000200, 2098 SEP_FLAG = 0x00000800, 2099 MTRR_FLAG = 0x00001000, 2100 PGE_FLAG = 0x00002000, 2101 MCA_FLAG = 0x00004000, 2102 CMOV_FLAG = 0x00008000, 2103 PAT_FLAG = 0x00010000, 2104 PSE36_FLAG = 0x00020000, 2105 PSNUM_FLAG = 0x00040000, 2106 CLFLUSH_FLAG = 0x00080000, 2107 DTS_FLAG = 0x00200000, 2108 ACPI_FLAG = 0x00400000, 2109 MMX_FLAG = 0x00800000, 2110 FXSR_FLAG = 0x01000000, 2111 SSE_FLAG = 0x02000000, 2112 SSE2_FLAG = 0x04000000, 2113 SS_FLAG = 0x08000000, 2114 HTT_FLAG = 0x10000000, 2115 TM_FLAG = 0x20000000 2116 } FeatureEdxFlag; 2117 2118 static BufferBlob* cpuid_brand_string_stub_blob; 2119 static const int cpuid_brand_string_stub_size = 550; 2120 2121 extern "C" { 2122 typedef void (*getCPUIDBrandString_stub_t)(void*); 2123 } 2124 2125 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = NULL; 2126 2127 // VM_Version statics 2128 enum { 2129 ExtendedFamilyIdLength_INTEL = 16, 2130 ExtendedFamilyIdLength_AMD = 24 2131 }; 2132 2133 const size_t VENDOR_LENGTH = 13; 2134 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1); 2135 static char* _cpu_brand_string = NULL; 2136 static int64_t _max_qualified_cpu_frequency = 0; 2137 2138 static int _no_of_threads = 0; 2139 static int _no_of_cores = 0; 2140 2141 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = { 2142 "8086/8088", 2143 "", 2144 "286", 2145 "386", 2146 "486", 2147 "Pentium", 2148 "Pentium Pro", //or Pentium-M/Woodcrest depending on model 2149 "", 2150 "", 2151 "", 2152 "", 2153 "", 2154 "", 2155 "", 2156 "", 2157 "Pentium 4" 2158 }; 2159 2160 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = { 2161 "", 2162 "", 2163 "", 2164 "", 2165 "5x86", 2166 "K5/K6", 2167 "Athlon/AthlonXP", 2168 "", 2169 "", 2170 "", 2171 "", 2172 "", 2173 "", 2174 "", 2175 "", 2176 "Opteron/Athlon64", 2177 "Opteron QC/Phenom", // Barcelona et.al. 2178 "", 2179 "", 2180 "", 2181 "", 2182 "", 2183 "", 2184 "Zen" 2185 }; 2186 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual, 2187 // September 2013, Vol 3C Table 35-1 2188 const char* const _model_id_pentium_pro[] = { 2189 "", 2190 "Pentium Pro", 2191 "", 2192 "Pentium II model 3", 2193 "", 2194 "Pentium II model 5/Xeon/Celeron", 2195 "Celeron", 2196 "Pentium III/Pentium III Xeon", 2197 "Pentium III/Pentium III Xeon", 2198 "Pentium M model 9", // Yonah 2199 "Pentium III, model A", 2200 "Pentium III, model B", 2201 "", 2202 "Pentium M model D", // Dothan 2203 "", 2204 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown 2205 "", 2206 "", 2207 "", 2208 "", 2209 "", 2210 "", 2211 "Celeron", // 0x16 Celeron 65nm 2212 "Core 2", // 0x17 Penryn / Harpertown 2213 "", 2214 "", 2215 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP 2216 "Atom", // 0x1B Z5xx series Silverthorn 2217 "", 2218 "Core 2", // 0x1D Dunnington (6-core) 2219 "Nehalem", // 0x1E CPU_MODEL_NEHALEM 2220 "", 2221 "", 2222 "", 2223 "", 2224 "", 2225 "", 2226 "Westmere", // 0x25 CPU_MODEL_WESTMERE 2227 "", 2228 "", 2229 "", // 0x28 2230 "", 2231 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3" 2232 "", 2233 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP 2234 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP 2235 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX 2236 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX 2237 "", 2238 "", 2239 "", 2240 "", 2241 "", 2242 "", 2243 "", 2244 "", 2245 "", 2246 "", 2247 "Ivy Bridge", // 0x3a 2248 "", 2249 "Haswell", // 0x3c "4th Generation Intel Core Processor" 2250 "", // 0x3d "Next Generation Intel Core Processor" 2251 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family" 2252 "", // 0x3f "Future Generation Intel Xeon Processor" 2253 "", 2254 "", 2255 "", 2256 "", 2257 "", 2258 "Haswell", // 0x45 "4th Generation Intel Core Processor" 2259 "Haswell", // 0x46 "4th Generation Intel Core Processor" 2260 NULL 2261 }; 2262 2263 /* Brand ID is for back compatibility 2264 * Newer CPUs uses the extended brand string */ 2265 const char* const _brand_id[] = { 2266 "", 2267 "Celeron processor", 2268 "Pentium III processor", 2269 "Intel Pentium III Xeon processor", 2270 "", 2271 "", 2272 "", 2273 "", 2274 "Intel Pentium 4 processor", 2275 NULL 2276 }; 2277 2278 2279 const char* const _feature_edx_id[] = { 2280 "On-Chip FPU", 2281 "Virtual Mode Extensions", 2282 "Debugging Extensions", 2283 "Page Size Extensions", 2284 "Time Stamp Counter", 2285 "Model Specific Registers", 2286 "Physical Address Extension", 2287 "Machine Check Exceptions", 2288 "CMPXCHG8B Instruction", 2289 "On-Chip APIC", 2290 "", 2291 "Fast System Call", 2292 "Memory Type Range Registers", 2293 "Page Global Enable", 2294 "Machine Check Architecture", 2295 "Conditional Mov Instruction", 2296 "Page Attribute Table", 2297 "36-bit Page Size Extension", 2298 "Processor Serial Number", 2299 "CLFLUSH Instruction", 2300 "", 2301 "Debug Trace Store feature", 2302 "ACPI registers in MSR space", 2303 "Intel Architecture MMX Technology", 2304 "Fast Float Point Save and Restore", 2305 "Streaming SIMD extensions", 2306 "Streaming SIMD extensions 2", 2307 "Self-Snoop", 2308 "Hyper Threading", 2309 "Thermal Monitor", 2310 "", 2311 "Pending Break Enable" 2312 }; 2313 2314 const char* const _feature_extended_edx_id[] = { 2315 "", 2316 "", 2317 "", 2318 "", 2319 "", 2320 "", 2321 "", 2322 "", 2323 "", 2324 "", 2325 "", 2326 "SYSCALL/SYSRET", 2327 "", 2328 "", 2329 "", 2330 "", 2331 "", 2332 "", 2333 "", 2334 "", 2335 "Execute Disable Bit", 2336 "", 2337 "", 2338 "", 2339 "", 2340 "", 2341 "", 2342 "RDTSCP", 2343 "", 2344 "Intel 64 Architecture", 2345 "", 2346 "" 2347 }; 2348 2349 const char* const _feature_ecx_id[] = { 2350 "Streaming SIMD Extensions 3", 2351 "PCLMULQDQ", 2352 "64-bit DS Area", 2353 "MONITOR/MWAIT instructions", 2354 "CPL Qualified Debug Store", 2355 "Virtual Machine Extensions", 2356 "Safer Mode Extensions", 2357 "Enhanced Intel SpeedStep technology", 2358 "Thermal Monitor 2", 2359 "Supplemental Streaming SIMD Extensions 3", 2360 "L1 Context ID", 2361 "", 2362 "Fused Multiply-Add", 2363 "CMPXCHG16B", 2364 "xTPR Update Control", 2365 "Perfmon and Debug Capability", 2366 "", 2367 "Process-context identifiers", 2368 "Direct Cache Access", 2369 "Streaming SIMD extensions 4.1", 2370 "Streaming SIMD extensions 4.2", 2371 "x2APIC", 2372 "MOVBE", 2373 "Popcount instruction", 2374 "TSC-Deadline", 2375 "AESNI", 2376 "XSAVE", 2377 "OSXSAVE", 2378 "AVX", 2379 "F16C", 2380 "RDRAND", 2381 "" 2382 }; 2383 2384 const char* const _feature_extended_ecx_id[] = { 2385 "LAHF/SAHF instruction support", 2386 "Core multi-processor legacy mode", 2387 "", 2388 "", 2389 "", 2390 "Advanced Bit Manipulations: LZCNT", 2391 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ", 2392 "Misaligned SSE mode", 2393 "", 2394 "", 2395 "", 2396 "", 2397 "", 2398 "", 2399 "", 2400 "", 2401 "", 2402 "", 2403 "", 2404 "", 2405 "", 2406 "", 2407 "", 2408 "", 2409 "", 2410 "", 2411 "", 2412 "", 2413 "", 2414 "", 2415 "", 2416 "" 2417 }; 2418 2419 void VM_Version::initialize_tsc(void) { 2420 ResourceMark rm; 2421 2422 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size); 2423 if (cpuid_brand_string_stub_blob == NULL) { 2424 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub"); 2425 } 2426 CodeBuffer c(cpuid_brand_string_stub_blob); 2427 VM_Version_StubGenerator g(&c); 2428 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t, 2429 g.generate_getCPUIDBrandString()); 2430 } 2431 2432 const char* VM_Version::cpu_model_description(void) { 2433 uint32_t cpu_family = extended_cpu_family(); 2434 uint32_t cpu_model = extended_cpu_model(); 2435 const char* model = NULL; 2436 2437 if (cpu_family == CPU_FAMILY_PENTIUMPRO) { 2438 for (uint32_t i = 0; i <= cpu_model; i++) { 2439 model = _model_id_pentium_pro[i]; 2440 if (model == NULL) { 2441 break; 2442 } 2443 } 2444 } 2445 return model; 2446 } 2447 2448 const char* VM_Version::cpu_brand_string(void) { 2449 if (_cpu_brand_string == NULL) { 2450 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal); 2451 if (NULL == _cpu_brand_string) { 2452 return NULL; 2453 } 2454 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH); 2455 if (ret_val != OS_OK) { 2456 FREE_C_HEAP_ARRAY(char, _cpu_brand_string); 2457 _cpu_brand_string = NULL; 2458 } 2459 } 2460 return _cpu_brand_string; 2461 } 2462 2463 const char* VM_Version::cpu_brand(void) { 2464 const char* brand = NULL; 2465 2466 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) { 2467 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF; 2468 brand = _brand_id[0]; 2469 for (int i = 0; brand != NULL && i <= brand_num; i += 1) { 2470 brand = _brand_id[i]; 2471 } 2472 } 2473 return brand; 2474 } 2475 2476 bool VM_Version::cpu_is_em64t(void) { 2477 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG); 2478 } 2479 2480 bool VM_Version::is_netburst(void) { 2481 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4)); 2482 } 2483 2484 bool VM_Version::supports_tscinv_ext(void) { 2485 if (!supports_tscinv_bit()) { 2486 return false; 2487 } 2488 2489 if (is_intel()) { 2490 return true; 2491 } 2492 2493 if (is_amd()) { 2494 return !is_amd_Barcelona(); 2495 } 2496 2497 if (is_hygon()) { 2498 return true; 2499 } 2500 2501 return false; 2502 } 2503 2504 void VM_Version::resolve_cpu_information_details(void) { 2505 2506 // in future we want to base this information on proper cpu 2507 // and cache topology enumeration such as: 2508 // Intel 64 Architecture Processor Topology Enumeration 2509 // which supports system cpu and cache topology enumeration 2510 // either using 2xAPICIDs or initial APICIDs 2511 2512 // currently only rough cpu information estimates 2513 // which will not necessarily reflect the exact configuration of the system 2514 2515 // this is the number of logical hardware threads 2516 // visible to the operating system 2517 _no_of_threads = os::processor_count(); 2518 2519 // find out number of threads per cpu package 2520 int threads_per_package = threads_per_core() * cores_per_cpu(); 2521 2522 // use amount of threads visible to the process in order to guess number of sockets 2523 _no_of_sockets = _no_of_threads / threads_per_package; 2524 2525 // process might only see a subset of the total number of threads 2526 // from a single processor package. Virtualization/resource management for example. 2527 // If so then just write a hard 1 as num of pkgs. 2528 if (0 == _no_of_sockets) { 2529 _no_of_sockets = 1; 2530 } 2531 2532 // estimate the number of cores 2533 _no_of_cores = cores_per_cpu() * _no_of_sockets; 2534 } 2535 2536 2537 const char* VM_Version::cpu_family_description(void) { 2538 int cpu_family_id = extended_cpu_family(); 2539 if (is_amd()) { 2540 if (cpu_family_id < ExtendedFamilyIdLength_AMD) { 2541 return _family_id_amd[cpu_family_id]; 2542 } 2543 } 2544 if (is_intel()) { 2545 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) { 2546 return cpu_model_description(); 2547 } 2548 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) { 2549 return _family_id_intel[cpu_family_id]; 2550 } 2551 } 2552 if (is_hygon()) { 2553 return "Dhyana"; 2554 } 2555 return "Unknown x86"; 2556 } 2557 2558 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) { 2559 assert(buf != NULL, "buffer is NULL!"); 2560 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!"); 2561 2562 const char* cpu_type = NULL; 2563 const char* x64 = NULL; 2564 2565 if (is_intel()) { 2566 cpu_type = "Intel"; 2567 x64 = cpu_is_em64t() ? " Intel64" : ""; 2568 } else if (is_amd()) { 2569 cpu_type = "AMD"; 2570 x64 = cpu_is_em64t() ? " AMD64" : ""; 2571 } else if (is_hygon()) { 2572 cpu_type = "Hygon"; 2573 x64 = cpu_is_em64t() ? " AMD64" : ""; 2574 } else { 2575 cpu_type = "Unknown x86"; 2576 x64 = cpu_is_em64t() ? " x86_64" : ""; 2577 } 2578 2579 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s", 2580 cpu_type, 2581 cpu_family_description(), 2582 supports_ht() ? " (HT)" : "", 2583 supports_sse3() ? " SSE3" : "", 2584 supports_ssse3() ? " SSSE3" : "", 2585 supports_sse4_1() ? " SSE4.1" : "", 2586 supports_sse4_2() ? " SSE4.2" : "", 2587 supports_sse4a() ? " SSE4A" : "", 2588 is_netburst() ? " Netburst" : "", 2589 is_intel_family_core() ? " Core" : "", 2590 x64); 2591 2592 return OS_OK; 2593 } 2594 2595 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) { 2596 assert(buf != NULL, "buffer is NULL!"); 2597 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!"); 2598 assert(getCPUIDBrandString_stub != NULL, "not initialized"); 2599 2600 // invoke newly generated asm code to fetch CPU Brand String 2601 getCPUIDBrandString_stub(&_cpuid_info); 2602 2603 // fetch results into buffer 2604 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0; 2605 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1; 2606 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2; 2607 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3; 2608 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4; 2609 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5; 2610 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6; 2611 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7; 2612 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8; 2613 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9; 2614 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10; 2615 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11; 2616 2617 return OS_OK; 2618 } 2619 2620 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) { 2621 guarantee(buf != NULL, "buffer is NULL!"); 2622 guarantee(buf_len > 0, "buffer len not enough!"); 2623 2624 unsigned int flag = 0; 2625 unsigned int fi = 0; 2626 size_t written = 0; 2627 const char* prefix = ""; 2628 2629 #define WRITE_TO_BUF(string) \ 2630 { \ 2631 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \ 2632 if (res < 0) { \ 2633 return buf_len - 1; \ 2634 } \ 2635 written += res; \ 2636 if (prefix[0] == '\0') { \ 2637 prefix = ", "; \ 2638 } \ 2639 } 2640 2641 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2642 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) { 2643 continue; /* no hyperthreading */ 2644 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) { 2645 continue; /* no fast system call */ 2646 } 2647 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) { 2648 WRITE_TO_BUF(_feature_edx_id[fi]); 2649 } 2650 } 2651 2652 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2653 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) { 2654 WRITE_TO_BUF(_feature_ecx_id[fi]); 2655 } 2656 } 2657 2658 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) { 2659 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) { 2660 WRITE_TO_BUF(_feature_extended_ecx_id[fi]); 2661 } 2662 } 2663 2664 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) { 2665 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) { 2666 WRITE_TO_BUF(_feature_extended_edx_id[fi]); 2667 } 2668 } 2669 2670 if (supports_tscinv_bit()) { 2671 WRITE_TO_BUF("Invariant TSC"); 2672 } 2673 2674 return written; 2675 } 2676 2677 /** 2678 * Write a detailed description of the cpu to a given buffer, including 2679 * feature set. 2680 */ 2681 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) { 2682 assert(buf != NULL, "buffer is NULL!"); 2683 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!"); 2684 2685 static const char* unknown = "<unknown>"; 2686 char vendor_id[VENDOR_LENGTH]; 2687 const char* family = NULL; 2688 const char* model = NULL; 2689 const char* brand = NULL; 2690 int outputLen = 0; 2691 2692 family = cpu_family_description(); 2693 if (family == NULL) { 2694 family = unknown; 2695 } 2696 2697 model = cpu_model_description(); 2698 if (model == NULL) { 2699 model = unknown; 2700 } 2701 2702 brand = cpu_brand_string(); 2703 2704 if (brand == NULL) { 2705 brand = cpu_brand(); 2706 if (brand == NULL) { 2707 brand = unknown; 2708 } 2709 } 2710 2711 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0; 2712 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2; 2713 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1; 2714 vendor_id[VENDOR_LENGTH-1] = '\0'; 2715 2716 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n" 2717 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n" 2718 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n" 2719 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2720 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n" 2721 "Supports: ", 2722 brand, 2723 vendor_id, 2724 family, 2725 extended_cpu_family(), 2726 model, 2727 extended_cpu_model(), 2728 cpu_stepping(), 2729 _cpuid_info.std_cpuid1_eax.bits.ext_family, 2730 _cpuid_info.std_cpuid1_eax.bits.ext_model, 2731 _cpuid_info.std_cpuid1_eax.bits.proc_type, 2732 _cpuid_info.std_cpuid1_eax.value, 2733 _cpuid_info.std_cpuid1_ebx.value, 2734 _cpuid_info.std_cpuid1_ecx.value, 2735 _cpuid_info.std_cpuid1_edx.value, 2736 _cpuid_info.ext_cpuid1_eax, 2737 _cpuid_info.ext_cpuid1_ebx, 2738 _cpuid_info.ext_cpuid1_ecx, 2739 _cpuid_info.ext_cpuid1_edx); 2740 2741 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) { 2742 if (buf_len > 0) { buf[buf_len-1] = '\0'; } 2743 return OS_ERR; 2744 } 2745 2746 cpu_write_support_string(&buf[outputLen], buf_len - outputLen); 2747 2748 return OS_OK; 2749 } 2750 2751 2752 // Fill in Abstract_VM_Version statics 2753 void VM_Version::initialize_cpu_information() { 2754 assert(_vm_version_initialized, "should have initialized VM_Version long ago"); 2755 assert(!_initialized, "shouldn't be initialized yet"); 2756 resolve_cpu_information_details(); 2757 2758 // initialize cpu_name and cpu_desc 2759 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE); 2760 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); 2761 _initialized = true; 2762 } 2763 2764 /** 2765 * For information about extracting the frequency from the cpu brand string, please see: 2766 * 2767 * Intel Processor Identification and the CPUID Instruction 2768 * Application Note 485 2769 * May 2012 2770 * 2771 * The return value is the frequency in Hz. 2772 */ 2773 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) { 2774 const char* const brand_string = cpu_brand_string(); 2775 if (brand_string == NULL) { 2776 return 0; 2777 } 2778 const int64_t MEGA = 1000000; 2779 int64_t multiplier = 0; 2780 int64_t frequency = 0; 2781 uint8_t idx = 0; 2782 // The brand string buffer is at most 48 bytes. 2783 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y. 2784 for (; idx < 48-2; ++idx) { 2785 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits. 2786 // Search brand string for "yHz" where y is M, G, or T. 2787 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') { 2788 if (brand_string[idx] == 'M') { 2789 multiplier = MEGA; 2790 } else if (brand_string[idx] == 'G') { 2791 multiplier = MEGA * 1000; 2792 } else if (brand_string[idx] == 'T') { 2793 multiplier = MEGA * MEGA; 2794 } 2795 break; 2796 } 2797 } 2798 if (multiplier > 0) { 2799 // Compute frequency (in Hz) from brand string. 2800 if (brand_string[idx-3] == '.') { // if format is "x.xx" 2801 frequency = (brand_string[idx-4] - '0') * multiplier; 2802 frequency += (brand_string[idx-2] - '0') * multiplier / 10; 2803 frequency += (brand_string[idx-1] - '0') * multiplier / 100; 2804 } else { // format is "xxxx" 2805 frequency = (brand_string[idx-4] - '0') * 1000; 2806 frequency += (brand_string[idx-3] - '0') * 100; 2807 frequency += (brand_string[idx-2] - '0') * 10; 2808 frequency += (brand_string[idx-1] - '0'); 2809 frequency *= multiplier; 2810 } 2811 } 2812 return frequency; 2813 } 2814 2815 2816 int64_t VM_Version::maximum_qualified_cpu_frequency(void) { 2817 if (_max_qualified_cpu_frequency == 0) { 2818 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string(); 2819 } 2820 return _max_qualified_cpu_frequency; 2821 } 2822