1 /*
   2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = 0;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = 0;
  59 
  60 static BufferBlob* stub_blob;
  61 static const int stub_size = 2000;
  62 
  63 extern "C" {
  64   typedef void (*get_cpu_info_stub_t)(void*);
  65   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  66 }
  67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  68 static detect_virt_stub_t detect_virt_stub = nullptr;
  69 
  70 #ifdef _LP64
  71 
  72 bool VM_Version::supports_clflush() {
  73   // clflush should always be available on x86_64
  74   // if not we are in real trouble because we rely on it
  75   // to flush the code cache.
  76   // Unfortunately, Assembler::clflush is currently called as part
  77   // of generation of the code cache flush routine. This happens
  78   // under Universe::init before the processor features are set
  79   // up. Assembler::flush calls this routine to check that clflush
  80   // is allowed. So, we give the caller a free pass if Universe init
  81   // is still in progress.
  82   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  83   return true;
  84 }
  85 #endif
  86 
  87 #define CPUID_STANDARD_FN   0x0
  88 #define CPUID_STANDARD_FN_1 0x1
  89 #define CPUID_STANDARD_FN_4 0x4
  90 #define CPUID_STANDARD_FN_B 0xb
  91 
  92 #define CPUID_EXTENDED_FN   0x80000000
  93 #define CPUID_EXTENDED_FN_1 0x80000001
  94 #define CPUID_EXTENDED_FN_2 0x80000002
  95 #define CPUID_EXTENDED_FN_3 0x80000003
  96 #define CPUID_EXTENDED_FN_4 0x80000004
  97 #define CPUID_EXTENDED_FN_7 0x80000007
  98 #define CPUID_EXTENDED_FN_8 0x80000008
  99 
 100 class VM_Version_StubGenerator: public StubCodeGenerator {
 101  public:
 102 
 103   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 104 
 105   address generate_get_cpu_info() {
 106     // Flags to test CPU type.
 107     const uint32_t HS_EFL_AC = 0x40000;
 108     const uint32_t HS_EFL_ID = 0x200000;
 109     // Values for when we don't have a CPUID instruction.
 110     const int      CPU_FAMILY_SHIFT = 8;
 111     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 112     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 113     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 114 
 115     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 116     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 117     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 118 
 119     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 120 #   define __ _masm->
 121 
 122     address start = __ pc();
 123 
 124     //
 125     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 126     //
 127     // LP64: rcx and rdx are first and second argument registers on windows
 128 
 129     __ push(rbp);
 130 #ifdef _LP64
 131     __ mov(rbp, c_rarg0); // cpuid_info address
 132 #else
 133     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 134 #endif
 135     __ push(rbx);
 136     __ push(rsi);
 137     __ pushf();          // preserve rbx, and flags
 138     __ pop(rax);
 139     __ push(rax);
 140     __ mov(rcx, rax);
 141     //
 142     // if we are unable to change the AC flag, we have a 386
 143     //
 144     __ xorl(rax, HS_EFL_AC);
 145     __ push(rax);
 146     __ popf();
 147     __ pushf();
 148     __ pop(rax);
 149     __ cmpptr(rax, rcx);
 150     __ jccb(Assembler::notEqual, detect_486);
 151 
 152     __ movl(rax, CPU_FAMILY_386);
 153     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 154     __ jmp(done);
 155 
 156     //
 157     // If we are unable to change the ID flag, we have a 486 which does
 158     // not support the "cpuid" instruction.
 159     //
 160     __ bind(detect_486);
 161     __ mov(rax, rcx);
 162     __ xorl(rax, HS_EFL_ID);
 163     __ push(rax);
 164     __ popf();
 165     __ pushf();
 166     __ pop(rax);
 167     __ cmpptr(rcx, rax);
 168     __ jccb(Assembler::notEqual, detect_586);
 169 
 170     __ bind(cpu486);
 171     __ movl(rax, CPU_FAMILY_486);
 172     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 173     __ jmp(done);
 174 
 175     //
 176     // At this point, we have a chip which supports the "cpuid" instruction
 177     //
 178     __ bind(detect_586);
 179     __ xorl(rax, rax);
 180     __ cpuid();
 181     __ orl(rax, rax);
 182     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 183                                         // value of at least 1, we give up and
 184                                         // assume a 486
 185     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 186     __ movl(Address(rsi, 0), rax);
 187     __ movl(Address(rsi, 4), rbx);
 188     __ movl(Address(rsi, 8), rcx);
 189     __ movl(Address(rsi,12), rdx);
 190 
 191     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 192     __ jccb(Assembler::belowEqual, std_cpuid4);
 193 
 194     //
 195     // cpuid(0xB) Processor Topology
 196     //
 197     __ movl(rax, 0xb);
 198     __ xorl(rcx, rcx);   // Threads level
 199     __ cpuid();
 200 
 201     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 202     __ movl(Address(rsi, 0), rax);
 203     __ movl(Address(rsi, 4), rbx);
 204     __ movl(Address(rsi, 8), rcx);
 205     __ movl(Address(rsi,12), rdx);
 206 
 207     __ movl(rax, 0xb);
 208     __ movl(rcx, 1);     // Cores level
 209     __ cpuid();
 210     __ push(rax);
 211     __ andl(rax, 0x1f);  // Determine if valid topology level
 212     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 213     __ andl(rax, 0xffff);
 214     __ pop(rax);
 215     __ jccb(Assembler::equal, std_cpuid4);
 216 
 217     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 218     __ movl(Address(rsi, 0), rax);
 219     __ movl(Address(rsi, 4), rbx);
 220     __ movl(Address(rsi, 8), rcx);
 221     __ movl(Address(rsi,12), rdx);
 222 
 223     __ movl(rax, 0xb);
 224     __ movl(rcx, 2);     // Packages level
 225     __ cpuid();
 226     __ push(rax);
 227     __ andl(rax, 0x1f);  // Determine if valid topology level
 228     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 229     __ andl(rax, 0xffff);
 230     __ pop(rax);
 231     __ jccb(Assembler::equal, std_cpuid4);
 232 
 233     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 234     __ movl(Address(rsi, 0), rax);
 235     __ movl(Address(rsi, 4), rbx);
 236     __ movl(Address(rsi, 8), rcx);
 237     __ movl(Address(rsi,12), rdx);
 238 
 239     //
 240     // cpuid(0x4) Deterministic cache params
 241     //
 242     __ bind(std_cpuid4);
 243     __ movl(rax, 4);
 244     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 245     __ jccb(Assembler::greater, std_cpuid1);
 246 
 247     __ xorl(rcx, rcx);   // L1 cache
 248     __ cpuid();
 249     __ push(rax);
 250     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 251     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 252     __ pop(rax);
 253     __ jccb(Assembler::equal, std_cpuid1);
 254 
 255     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 256     __ movl(Address(rsi, 0), rax);
 257     __ movl(Address(rsi, 4), rbx);
 258     __ movl(Address(rsi, 8), rcx);
 259     __ movl(Address(rsi,12), rdx);
 260 
 261     //
 262     // Standard cpuid(0x1)
 263     //
 264     __ bind(std_cpuid1);
 265     __ movl(rax, 1);
 266     __ cpuid();
 267     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 268     __ movl(Address(rsi, 0), rax);
 269     __ movl(Address(rsi, 4), rbx);
 270     __ movl(Address(rsi, 8), rcx);
 271     __ movl(Address(rsi,12), rdx);
 272 
 273     //
 274     // Check if OS has enabled XGETBV instruction to access XCR0
 275     // (OSXSAVE feature flag) and CPU supports AVX
 276     //
 277     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 278     __ cmpl(rcx, 0x18000000);
 279     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 280 
 281     //
 282     // XCR0, XFEATURE_ENABLED_MASK register
 283     //
 284     __ xorl(rcx, rcx);   // zero for XCR0 register
 285     __ xgetbv();
 286     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 287     __ movl(Address(rsi, 0), rax);
 288     __ movl(Address(rsi, 4), rdx);
 289 
 290     //
 291     // cpuid(0x7) Structured Extended Features
 292     //
 293     __ bind(sef_cpuid);
 294     __ movl(rax, 7);
 295     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 296     __ jccb(Assembler::greater, ext_cpuid);
 297     // ECX = 0
 298     __ xorl(rcx, rcx);
 299     __ cpuid();
 300     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 301     __ movl(Address(rsi, 0), rax);
 302     __ movl(Address(rsi, 4), rbx);
 303     __ movl(Address(rsi, 8), rcx);
 304     __ movl(Address(rsi, 12), rdx);
 305 
 306     // ECX = 1
 307     __ movl(rax, 7);
 308     __ movl(rcx, 1);
 309     __ cpuid();
 310     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_ecx1_offset())));
 311     __ movl(Address(rsi, 0), rax);
 312 
 313     //
 314     // Extended cpuid(0x80000000)
 315     //
 316     __ bind(ext_cpuid);
 317     __ movl(rax, 0x80000000);
 318     __ cpuid();
 319     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 320     __ jcc(Assembler::belowEqual, done);
 321     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 322     __ jcc(Assembler::belowEqual, ext_cpuid1);
 323     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 324     __ jccb(Assembler::belowEqual, ext_cpuid5);
 325     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 326     __ jccb(Assembler::belowEqual, ext_cpuid7);
 327     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 328     __ jccb(Assembler::belowEqual, ext_cpuid8);
 329     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 330     __ jccb(Assembler::below, ext_cpuid8);
 331     //
 332     // Extended cpuid(0x8000001E)
 333     //
 334     __ movl(rax, 0x8000001E);
 335     __ cpuid();
 336     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 337     __ movl(Address(rsi, 0), rax);
 338     __ movl(Address(rsi, 4), rbx);
 339     __ movl(Address(rsi, 8), rcx);
 340     __ movl(Address(rsi,12), rdx);
 341 
 342     //
 343     // Extended cpuid(0x80000008)
 344     //
 345     __ bind(ext_cpuid8);
 346     __ movl(rax, 0x80000008);
 347     __ cpuid();
 348     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 349     __ movl(Address(rsi, 0), rax);
 350     __ movl(Address(rsi, 4), rbx);
 351     __ movl(Address(rsi, 8), rcx);
 352     __ movl(Address(rsi,12), rdx);
 353 
 354     //
 355     // Extended cpuid(0x80000007)
 356     //
 357     __ bind(ext_cpuid7);
 358     __ movl(rax, 0x80000007);
 359     __ cpuid();
 360     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 361     __ movl(Address(rsi, 0), rax);
 362     __ movl(Address(rsi, 4), rbx);
 363     __ movl(Address(rsi, 8), rcx);
 364     __ movl(Address(rsi,12), rdx);
 365 
 366     //
 367     // Extended cpuid(0x80000005)
 368     //
 369     __ bind(ext_cpuid5);
 370     __ movl(rax, 0x80000005);
 371     __ cpuid();
 372     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 373     __ movl(Address(rsi, 0), rax);
 374     __ movl(Address(rsi, 4), rbx);
 375     __ movl(Address(rsi, 8), rcx);
 376     __ movl(Address(rsi,12), rdx);
 377 
 378     //
 379     // Extended cpuid(0x80000001)
 380     //
 381     __ bind(ext_cpuid1);
 382     __ movl(rax, 0x80000001);
 383     __ cpuid();
 384     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 385     __ movl(Address(rsi, 0), rax);
 386     __ movl(Address(rsi, 4), rbx);
 387     __ movl(Address(rsi, 8), rcx);
 388     __ movl(Address(rsi,12), rdx);
 389 
 390     //
 391     // Check if OS has enabled XGETBV instruction to access XCR0
 392     // (OSXSAVE feature flag) and CPU supports AVX
 393     //
 394     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 395     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 396     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 397     __ cmpl(rcx, 0x18000000);
 398     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 399 
 400     __ movl(rax, 0x6);
 401     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 402     __ cmpl(rax, 0x6);
 403     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 404 
 405     // we need to bridge farther than imm8, so we use this island as a thunk
 406     __ bind(done);
 407     __ jmp(wrapup);
 408 
 409     __ bind(start_simd_check);
 410     //
 411     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 412     // registers are not restored after a signal processing.
 413     // Generate SEGV here (reference through null)
 414     // and check upper YMM/ZMM bits after it.
 415     //
 416     int saved_useavx = UseAVX;
 417     int saved_usesse = UseSSE;
 418 
 419     // If UseAVX is uninitialized or is set by the user to include EVEX
 420     if (use_evex) {
 421       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 422       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 423       __ movl(rax, 0x10000);
 424       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 425       __ cmpl(rax, 0x10000);
 426       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 427       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 428       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 429       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 430       __ movl(rax, 0xE0);
 431       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 432       __ cmpl(rax, 0xE0);
 433       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 434 
 435       if (FLAG_IS_DEFAULT(UseAVX)) {
 436         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 437         __ movl(rax, Address(rsi, 0));
 438         __ cmpl(rax, 0x50654);              // If it is Skylake
 439         __ jcc(Assembler::equal, legacy_setup);
 440       }
 441       // EVEX setup: run in lowest evex mode
 442       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 443       UseAVX = 3;
 444       UseSSE = 2;
 445 #ifdef _WINDOWS
 446       // xmm5-xmm15 are not preserved by caller on windows
 447       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 448       __ subptr(rsp, 64);
 449       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 450 #ifdef _LP64
 451       __ subptr(rsp, 64);
 452       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 453       __ subptr(rsp, 64);
 454       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 455 #endif // _LP64
 456 #endif // _WINDOWS
 457 
 458       // load value into all 64 bytes of zmm7 register
 459       __ movl(rcx, VM_Version::ymm_test_value());
 460       __ movdl(xmm0, rcx);
 461       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 462       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 463 #ifdef _LP64
 464       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 465       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 466 #endif
 467       VM_Version::clean_cpuFeatures();
 468       __ jmp(save_restore_except);
 469     }
 470 
 471     __ bind(legacy_setup);
 472     // AVX setup
 473     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 474     UseAVX = 1;
 475     UseSSE = 2;
 476 #ifdef _WINDOWS
 477     __ subptr(rsp, 32);
 478     __ vmovdqu(Address(rsp, 0), xmm7);
 479 #ifdef _LP64
 480     __ subptr(rsp, 32);
 481     __ vmovdqu(Address(rsp, 0), xmm8);
 482     __ subptr(rsp, 32);
 483     __ vmovdqu(Address(rsp, 0), xmm15);
 484 #endif // _LP64
 485 #endif // _WINDOWS
 486 
 487     // load value into all 32 bytes of ymm7 register
 488     __ movl(rcx, VM_Version::ymm_test_value());
 489 
 490     __ movdl(xmm0, rcx);
 491     __ pshufd(xmm0, xmm0, 0x00);
 492     __ vinsertf128_high(xmm0, xmm0);
 493     __ vmovdqu(xmm7, xmm0);
 494 #ifdef _LP64
 495     __ vmovdqu(xmm8, xmm0);
 496     __ vmovdqu(xmm15, xmm0);
 497 #endif
 498     VM_Version::clean_cpuFeatures();
 499 
 500     __ bind(save_restore_except);
 501     __ xorl(rsi, rsi);
 502     VM_Version::set_cpuinfo_segv_addr(__ pc());
 503     // Generate SEGV
 504     __ movl(rax, Address(rsi, 0));
 505 
 506     VM_Version::set_cpuinfo_cont_addr(__ pc());
 507     // Returns here after signal. Save xmm0 to check it later.
 508 
 509     // If UseAVX is uninitialized or is set by the user to include EVEX
 510     if (use_evex) {
 511       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 512       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 513       __ movl(rax, 0x10000);
 514       __ andl(rax, Address(rsi, 4));
 515       __ cmpl(rax, 0x10000);
 516       __ jcc(Assembler::notEqual, legacy_save_restore);
 517       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 518       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 519       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 520       __ movl(rax, 0xE0);
 521       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 522       __ cmpl(rax, 0xE0);
 523       __ jcc(Assembler::notEqual, legacy_save_restore);
 524 
 525       if (FLAG_IS_DEFAULT(UseAVX)) {
 526         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 527         __ movl(rax, Address(rsi, 0));
 528         __ cmpl(rax, 0x50654);              // If it is Skylake
 529         __ jcc(Assembler::equal, legacy_save_restore);
 530       }
 531       // EVEX check: run in lowest evex mode
 532       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 533       UseAVX = 3;
 534       UseSSE = 2;
 535       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 536       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 537       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 538 #ifdef _LP64
 539       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 540       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 541 #endif
 542 
 543 #ifdef _WINDOWS
 544 #ifdef _LP64
 545       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 546       __ addptr(rsp, 64);
 547       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 548       __ addptr(rsp, 64);
 549 #endif // _LP64
 550       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 551       __ addptr(rsp, 64);
 552 #endif // _WINDOWS
 553       generate_vzeroupper(wrapup);
 554       VM_Version::clean_cpuFeatures();
 555       UseAVX = saved_useavx;
 556       UseSSE = saved_usesse;
 557       __ jmp(wrapup);
 558    }
 559 
 560     __ bind(legacy_save_restore);
 561     // AVX check
 562     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 563     UseAVX = 1;
 564     UseSSE = 2;
 565     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 566     __ vmovdqu(Address(rsi, 0), xmm0);
 567     __ vmovdqu(Address(rsi, 32), xmm7);
 568 #ifdef _LP64
 569     __ vmovdqu(Address(rsi, 64), xmm8);
 570     __ vmovdqu(Address(rsi, 96), xmm15);
 571 #endif
 572 
 573 #ifdef _WINDOWS
 574 #ifdef _LP64
 575     __ vmovdqu(xmm15, Address(rsp, 0));
 576     __ addptr(rsp, 32);
 577     __ vmovdqu(xmm8, Address(rsp, 0));
 578     __ addptr(rsp, 32);
 579 #endif // _LP64
 580     __ vmovdqu(xmm7, Address(rsp, 0));
 581     __ addptr(rsp, 32);
 582 #endif // _WINDOWS
 583     generate_vzeroupper(wrapup);
 584     VM_Version::clean_cpuFeatures();
 585     UseAVX = saved_useavx;
 586     UseSSE = saved_usesse;
 587 
 588     __ bind(wrapup);
 589     __ popf();
 590     __ pop(rsi);
 591     __ pop(rbx);
 592     __ pop(rbp);
 593     __ ret(0);
 594 
 595 #   undef __
 596 
 597     return start;
 598   };
 599   void generate_vzeroupper(Label& L_wrapup) {
 600 #   define __ _masm->
 601     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 602     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 603     __ jcc(Assembler::notEqual, L_wrapup);
 604     __ movl(rcx, 0x0FFF0FF0);
 605     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 606     __ andl(rcx, Address(rsi, 0));
 607     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 608     __ jcc(Assembler::equal, L_wrapup);
 609     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 610     __ jcc(Assembler::equal, L_wrapup);
 611     // vzeroupper() will use a pre-computed instruction sequence that we
 612     // can't compute until after we've determined CPU capabilities. Use
 613     // uncached variant here directly to be able to bootstrap correctly
 614     __ vzeroupper_uncached();
 615 #   undef __
 616   }
 617   address generate_detect_virt() {
 618     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 619 #   define __ _masm->
 620 
 621     address start = __ pc();
 622 
 623     // Evacuate callee-saved registers
 624     __ push(rbp);
 625     __ push(rbx);
 626     __ push(rsi); // for Windows
 627 
 628 #ifdef _LP64
 629     __ mov(rax, c_rarg0); // CPUID leaf
 630     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 631 #else
 632     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 633     __ movptr(rsi, Address(rsp, 20)); // register array address
 634 #endif
 635 
 636     __ cpuid();
 637 
 638     // Store result to register array
 639     __ movl(Address(rsi,  0), rax);
 640     __ movl(Address(rsi,  4), rbx);
 641     __ movl(Address(rsi,  8), rcx);
 642     __ movl(Address(rsi, 12), rdx);
 643 
 644     // Epilogue
 645     __ pop(rsi);
 646     __ pop(rbx);
 647     __ pop(rbp);
 648     __ ret(0);
 649 
 650 #   undef __
 651 
 652     return start;
 653   };
 654 
 655 
 656   address generate_getCPUIDBrandString(void) {
 657     // Flags to test CPU type.
 658     const uint32_t HS_EFL_AC           = 0x40000;
 659     const uint32_t HS_EFL_ID           = 0x200000;
 660     // Values for when we don't have a CPUID instruction.
 661     const int      CPU_FAMILY_SHIFT = 8;
 662     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 663     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 664 
 665     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 666 
 667     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 668 #   define __ _masm->
 669 
 670     address start = __ pc();
 671 
 672     //
 673     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 674     //
 675     // LP64: rcx and rdx are first and second argument registers on windows
 676 
 677     __ push(rbp);
 678 #ifdef _LP64
 679     __ mov(rbp, c_rarg0); // cpuid_info address
 680 #else
 681     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 682 #endif
 683     __ push(rbx);
 684     __ push(rsi);
 685     __ pushf();          // preserve rbx, and flags
 686     __ pop(rax);
 687     __ push(rax);
 688     __ mov(rcx, rax);
 689     //
 690     // if we are unable to change the AC flag, we have a 386
 691     //
 692     __ xorl(rax, HS_EFL_AC);
 693     __ push(rax);
 694     __ popf();
 695     __ pushf();
 696     __ pop(rax);
 697     __ cmpptr(rax, rcx);
 698     __ jccb(Assembler::notEqual, detect_486);
 699 
 700     __ movl(rax, CPU_FAMILY_386);
 701     __ jmp(done);
 702 
 703     //
 704     // If we are unable to change the ID flag, we have a 486 which does
 705     // not support the "cpuid" instruction.
 706     //
 707     __ bind(detect_486);
 708     __ mov(rax, rcx);
 709     __ xorl(rax, HS_EFL_ID);
 710     __ push(rax);
 711     __ popf();
 712     __ pushf();
 713     __ pop(rax);
 714     __ cmpptr(rcx, rax);
 715     __ jccb(Assembler::notEqual, detect_586);
 716 
 717     __ bind(cpu486);
 718     __ movl(rax, CPU_FAMILY_486);
 719     __ jmp(done);
 720 
 721     //
 722     // At this point, we have a chip which supports the "cpuid" instruction
 723     //
 724     __ bind(detect_586);
 725     __ xorl(rax, rax);
 726     __ cpuid();
 727     __ orl(rax, rax);
 728     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 729                                         // value of at least 1, we give up and
 730                                         // assume a 486
 731 
 732     //
 733     // Extended cpuid(0x80000000) for processor brand string detection
 734     //
 735     __ bind(ext_cpuid);
 736     __ movl(rax, CPUID_EXTENDED_FN);
 737     __ cpuid();
 738     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 739     __ jcc(Assembler::below, done);
 740 
 741     //
 742     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 743     //
 744     __ movl(rax, CPUID_EXTENDED_FN_2);
 745     __ cpuid();
 746     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 747     __ movl(Address(rsi, 0), rax);
 748     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 749     __ movl(Address(rsi, 0), rbx);
 750     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 751     __ movl(Address(rsi, 0), rcx);
 752     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 753     __ movl(Address(rsi,0), rdx);
 754 
 755     //
 756     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 757     //
 758     __ movl(rax, CPUID_EXTENDED_FN_3);
 759     __ cpuid();
 760     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 761     __ movl(Address(rsi, 0), rax);
 762     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 763     __ movl(Address(rsi, 0), rbx);
 764     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 765     __ movl(Address(rsi, 0), rcx);
 766     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 767     __ movl(Address(rsi,0), rdx);
 768 
 769     //
 770     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 771     //
 772     __ movl(rax, CPUID_EXTENDED_FN_4);
 773     __ cpuid();
 774     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 775     __ movl(Address(rsi, 0), rax);
 776     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 777     __ movl(Address(rsi, 0), rbx);
 778     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 779     __ movl(Address(rsi, 0), rcx);
 780     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 781     __ movl(Address(rsi,0), rdx);
 782 
 783     //
 784     // return
 785     //
 786     __ bind(done);
 787     __ popf();
 788     __ pop(rsi);
 789     __ pop(rbx);
 790     __ pop(rbp);
 791     __ ret(0);
 792 
 793 #   undef __
 794 
 795     return start;
 796   };
 797 };
 798 
 799 void VM_Version::get_processor_features() {
 800 
 801   _cpu = 4; // 486 by default
 802   _model = 0;
 803   _stepping = 0;
 804   _features = 0;
 805   _logical_processors_per_package = 1;
 806   // i486 internal cache is both I&D and has a 16-byte line size
 807   _L1_data_cache_line_size = 16;
 808 
 809   // Get raw processor info
 810 
 811   get_cpu_info_stub(&_cpuid_info);
 812 
 813   assert_is_initialized();
 814   _cpu = extended_cpu_family();
 815   _model = extended_cpu_model();
 816   _stepping = cpu_stepping();
 817 
 818   if (cpu_family() > 4) { // it supports CPUID
 819     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 820     _cpu_features = _features;   // Preserve features
 821     // Logical processors are only available on P4s and above,
 822     // and only if hyperthreading is available.
 823     _logical_processors_per_package = logical_processor_count();
 824     _L1_data_cache_line_size = L1_line_size();
 825   }
 826 
 827   // xchg and xadd instructions
 828   _supports_atomic_getset4 = true;
 829   _supports_atomic_getadd4 = true;
 830   LP64_ONLY(_supports_atomic_getset8 = true);
 831   LP64_ONLY(_supports_atomic_getadd8 = true);
 832 
 833 #ifdef _LP64
 834   // OS should support SSE for x64 and hardware should support at least SSE2.
 835   if (!VM_Version::supports_sse2()) {
 836     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 837   }
 838   // in 64 bit the use of SSE2 is the minimum
 839   if (UseSSE < 2) UseSSE = 2;
 840 #endif
 841 
 842 #ifdef AMD64
 843   // flush_icache_stub have to be generated first.
 844   // That is why Icache line size is hard coded in ICache class,
 845   // see icache_x86.hpp. It is also the reason why we can't use
 846   // clflush instruction in 32-bit VM since it could be running
 847   // on CPU which does not support it.
 848   //
 849   // The only thing we can do is to verify that flushed
 850   // ICache::line_size has correct value.
 851   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 852   // clflush_size is size in quadwords (8 bytes).
 853   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 854 #endif
 855 
 856 #ifdef _LP64
 857   // assigning this field effectively enables Unsafe.writebackMemory()
 858   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 859   // that is only implemented on x86_64 and only if the OS plays ball
 860   if (os::supports_map_sync()) {
 861     // publish data cache line flush size to generic field, otherwise
 862     // let if default to zero thereby disabling writeback
 863     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 864   }
 865 #endif
 866 
 867   // Check if processor has Intel Ecore
 868   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 869     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
 870     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 871   }
 872 
 873   if (UseSSE < 4) {
 874     _features &= ~CPU_SSE4_1;
 875     _features &= ~CPU_SSE4_2;
 876   }
 877 
 878   if (UseSSE < 3) {
 879     _features &= ~CPU_SSE3;
 880     _features &= ~CPU_SSSE3;
 881     _features &= ~CPU_SSE4A;
 882   }
 883 
 884   if (UseSSE < 2)
 885     _features &= ~CPU_SSE2;
 886 
 887   if (UseSSE < 1)
 888     _features &= ~CPU_SSE;
 889 
 890   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 891   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 892     UseAVX = 0;
 893   }
 894 
 895   // UseSSE is set to the smaller of what hardware supports and what
 896   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 897   // older Pentiums which do not support it.
 898   int use_sse_limit = 0;
 899   if (UseSSE > 0) {
 900     if (UseSSE > 3 && supports_sse4_1()) {
 901       use_sse_limit = 4;
 902     } else if (UseSSE > 2 && supports_sse3()) {
 903       use_sse_limit = 3;
 904     } else if (UseSSE > 1 && supports_sse2()) {
 905       use_sse_limit = 2;
 906     } else if (UseSSE > 0 && supports_sse()) {
 907       use_sse_limit = 1;
 908     } else {
 909       use_sse_limit = 0;
 910     }
 911   }
 912   if (FLAG_IS_DEFAULT(UseSSE)) {
 913     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 914   } else if (UseSSE > use_sse_limit) {
 915     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 916     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 917   }
 918 
 919   // first try initial setting and detect what we can support
 920   int use_avx_limit = 0;
 921   if (UseAVX > 0) {
 922     if (UseSSE < 4) {
 923       // Don't use AVX if SSE is unavailable or has been disabled.
 924       use_avx_limit = 0;
 925     } else if (UseAVX > 2 && supports_evex()) {
 926       use_avx_limit = 3;
 927     } else if (UseAVX > 1 && supports_avx2()) {
 928       use_avx_limit = 2;
 929     } else if (UseAVX > 0 && supports_avx()) {
 930       use_avx_limit = 1;
 931     } else {
 932       use_avx_limit = 0;
 933     }
 934   }
 935   if (FLAG_IS_DEFAULT(UseAVX)) {
 936     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 937     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 938       FLAG_SET_DEFAULT(UseAVX, 2);
 939     } else {
 940       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 941     }
 942   }
 943   if (UseAVX > use_avx_limit) {
 944     if (UseSSE < 4) {
 945       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 946     } else {
 947       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 948     }
 949     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 950   }
 951 
 952   if (UseAVX < 3) {
 953     _features &= ~CPU_AVX512F;
 954     _features &= ~CPU_AVX512DQ;
 955     _features &= ~CPU_AVX512CD;
 956     _features &= ~CPU_AVX512BW;
 957     _features &= ~CPU_AVX512VL;
 958     _features &= ~CPU_AVX512_VPOPCNTDQ;
 959     _features &= ~CPU_AVX512_VPCLMULQDQ;
 960     _features &= ~CPU_AVX512_VAES;
 961     _features &= ~CPU_AVX512_VNNI;
 962     _features &= ~CPU_AVX512_VBMI;
 963     _features &= ~CPU_AVX512_VBMI2;
 964     _features &= ~CPU_AVX512_BITALG;
 965     _features &= ~CPU_AVX512_IFMA;
 966   }
 967 
 968   if (UseAVX < 2) {
 969     _features &= ~CPU_AVX2;
 970     _features &= ~CPU_AVX_IFMA;
 971   }
 972 
 973   if (UseAVX < 1) {
 974     _features &= ~CPU_AVX;
 975     _features &= ~CPU_VZEROUPPER;
 976     _features &= ~CPU_F16C;
 977   }
 978 
 979   if (logical_processors_per_package() == 1) {
 980     // HT processor could be installed on a system which doesn't support HT.
 981     _features &= ~CPU_HT;
 982   }
 983 
 984   if (is_intel()) { // Intel cpus specific settings
 985     if (is_knights_family()) {
 986       _features &= ~CPU_VZEROUPPER;
 987       _features &= ~CPU_AVX512BW;
 988       _features &= ~CPU_AVX512VL;
 989       _features &= ~CPU_AVX512DQ;
 990       _features &= ~CPU_AVX512_VNNI;
 991       _features &= ~CPU_AVX512_VAES;
 992       _features &= ~CPU_AVX512_VPOPCNTDQ;
 993       _features &= ~CPU_AVX512_VPCLMULQDQ;
 994       _features &= ~CPU_AVX512_VBMI;
 995       _features &= ~CPU_AVX512_VBMI2;
 996       _features &= ~CPU_CLWB;
 997       _features &= ~CPU_FLUSHOPT;
 998       _features &= ~CPU_GFNI;
 999       _features &= ~CPU_AVX512_BITALG;
1000       _features &= ~CPU_AVX512_IFMA;
1001       _features &= ~CPU_AVX_IFMA;
1002     }
1003   }
1004 
1005   // APX support not enabled yet
1006   if (UseAPX) {
1007     if (!FLAG_IS_DEFAULT(UseAPX)) {
1008         warning("APX is not supported on this CPU.");
1009     }
1010     FLAG_SET_DEFAULT(UseAPX, false);
1011   }
1012 
1013   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1014     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1015   } else {
1016     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1017   }
1018 
1019   char buf[1024];
1020   int res = jio_snprintf(
1021               buf, sizeof(buf),
1022               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1023               cores_per_cpu(), threads_per_core(),
1024               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1025   assert(res > 0, "not enough temporary space allocated");
1026   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1027 
1028   _features_string = os::strdup(buf);
1029 
1030   // Use AES instructions if available.
1031   if (supports_aes()) {
1032     if (FLAG_IS_DEFAULT(UseAES)) {
1033       FLAG_SET_DEFAULT(UseAES, true);
1034     }
1035     if (!UseAES) {
1036       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1037         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1038       }
1039       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1040     } else {
1041       if (UseSSE > 2) {
1042         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1043           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1044         }
1045       } else {
1046         // The AES intrinsic stubs require AES instruction support (of course)
1047         // but also require sse3 mode or higher for instructions it use.
1048         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1049           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1050         }
1051         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1052       }
1053 
1054       // --AES-CTR begins--
1055       if (!UseAESIntrinsics) {
1056         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1057           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1058           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1059         }
1060       } else {
1061         if (supports_sse4_1()) {
1062           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1063             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1064           }
1065         } else {
1066            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1067            // but also require sse4.1 mode or higher for instructions it use.
1068           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1069              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1070            }
1071            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1072         }
1073       }
1074       // --AES-CTR ends--
1075     }
1076   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1077     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1078       warning("AES instructions are not available on this CPU");
1079       FLAG_SET_DEFAULT(UseAES, false);
1080     }
1081     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1082       warning("AES intrinsics are not available on this CPU");
1083       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1084     }
1085     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1086       warning("AES-CTR intrinsics are not available on this CPU");
1087       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1088     }
1089   }
1090 
1091   // Use CLMUL instructions if available.
1092   if (supports_clmul()) {
1093     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1094       UseCLMUL = true;
1095     }
1096   } else if (UseCLMUL) {
1097     if (!FLAG_IS_DEFAULT(UseCLMUL))
1098       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1099     FLAG_SET_DEFAULT(UseCLMUL, false);
1100   }
1101 
1102   if (UseCLMUL && (UseSSE > 2)) {
1103     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1104       UseCRC32Intrinsics = true;
1105     }
1106   } else if (UseCRC32Intrinsics) {
1107     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1108       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1109     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1110   }
1111 
1112 #ifdef _LP64
1113   if (supports_avx2()) {
1114     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1115       UseAdler32Intrinsics = true;
1116     }
1117   } else if (UseAdler32Intrinsics) {
1118     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1119       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1120     }
1121     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1122   }
1123 #else
1124   if (UseAdler32Intrinsics) {
1125     warning("Adler32Intrinsics not available on this CPU.");
1126     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1127   }
1128 #endif
1129 
1130   if (supports_sse4_2() && supports_clmul()) {
1131     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1132       UseCRC32CIntrinsics = true;
1133     }
1134   } else if (UseCRC32CIntrinsics) {
1135     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1136       warning("CRC32C intrinsics are not available on this CPU");
1137     }
1138     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1139   }
1140 
1141   // GHASH/GCM intrinsics
1142   if (UseCLMUL && (UseSSE > 2)) {
1143     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1144       UseGHASHIntrinsics = true;
1145     }
1146   } else if (UseGHASHIntrinsics) {
1147     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1148       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1149     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1150   }
1151 
1152 #ifdef _LP64
1153   // ChaCha20 Intrinsics
1154   // As long as the system supports AVX as a baseline we can do a
1155   // SIMD-enabled block function.  StubGenerator makes the determination
1156   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1157   // version.
1158   if (UseAVX >= 1) {
1159       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1160           UseChaCha20Intrinsics = true;
1161       }
1162   } else if (UseChaCha20Intrinsics) {
1163       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1164           warning("ChaCha20 intrinsic requires AVX instructions");
1165       }
1166       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1167   }
1168 #else
1169   // No support currently for ChaCha20 intrinsics on 32-bit platforms
1170   if (UseChaCha20Intrinsics) {
1171       warning("ChaCha20 intrinsics are not available on this CPU.");
1172       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1173   }
1174 #endif // _LP64
1175 
1176   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1177   if (UseAVX >= 2) {
1178     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1179       UseBASE64Intrinsics = true;
1180     }
1181   } else if (UseBASE64Intrinsics) {
1182      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1183       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1184     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1185   }
1186 
1187   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1188     if (FLAG_IS_DEFAULT(UseFMA)) {
1189       UseFMA = true;
1190     }
1191   } else if (UseFMA) {
1192     warning("FMA instructions are not available on this CPU");
1193     FLAG_SET_DEFAULT(UseFMA, false);
1194   }
1195 
1196   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1197     UseMD5Intrinsics = true;
1198   }
1199 
1200   if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1201     if (FLAG_IS_DEFAULT(UseSHA)) {
1202       UseSHA = true;
1203     }
1204   } else if (UseSHA) {
1205     warning("SHA instructions are not available on this CPU");
1206     FLAG_SET_DEFAULT(UseSHA, false);
1207   }
1208 
1209   if (supports_sha() && supports_sse4_1() && UseSHA) {
1210     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1211       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1212     }
1213   } else if (UseSHA1Intrinsics) {
1214     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1215     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1216   }
1217 
1218   if (supports_sse4_1() && UseSHA) {
1219     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1220       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1221     }
1222   } else if (UseSHA256Intrinsics) {
1223     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1224     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1225   }
1226 
1227 #ifdef _LP64
1228   // These are only supported on 64-bit
1229   if (UseSHA && supports_avx2() && supports_bmi2()) {
1230     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1231       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1232     }
1233   } else
1234 #endif
1235   if (UseSHA512Intrinsics) {
1236     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1237     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1238   }
1239 
1240   if (UseSHA3Intrinsics) {
1241     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1242     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1243   }
1244 
1245   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1246     FLAG_SET_DEFAULT(UseSHA, false);
1247   }
1248 
1249   if (!supports_rtm() && UseRTMLocking) {
1250     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1251   }
1252 
1253 #if INCLUDE_RTM_OPT
1254   if (UseRTMLocking) {
1255     if (!CompilerConfig::is_c2_enabled()) {
1256       // Only C2 does RTM locking optimization.
1257       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1258     }
1259     if (is_intel_family_core()) {
1260       if ((_model == CPU_MODEL_HASWELL_E3) ||
1261           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1262           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1263         // currently a collision between SKL and HSW_E3
1264         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1265           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1266                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1267         } else {
1268           warning("UseRTMLocking is only available as experimental option on this platform.");
1269         }
1270       }
1271     }
1272     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1273       // RTM locking should be used only for applications with
1274       // high lock contention. For now we do not use it by default.
1275       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1276     }
1277   } else { // !UseRTMLocking
1278     if (UseRTMForStackLocks) {
1279       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1280         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1281       }
1282       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1283     }
1284     if (UseRTMDeopt) {
1285       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1286     }
1287     if (PrintPreciseRTMLockingStatistics) {
1288       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1289     }
1290   }
1291 #else
1292   if (UseRTMLocking) {
1293     // Only C2 does RTM locking optimization.
1294     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1295   }
1296 #endif
1297 
1298 #ifdef COMPILER2
1299   if (UseFPUForSpilling) {
1300     if (UseSSE < 2) {
1301       // Only supported with SSE2+
1302       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1303     }
1304   }
1305 #endif
1306 
1307 #if COMPILER2_OR_JVMCI
1308   int max_vector_size = 0;
1309   if (UseSSE < 2) {
1310     // Vectors (in XMM) are only supported with SSE2+
1311     // SSE is always 2 on x64.
1312     max_vector_size = 0;
1313   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1314     // 16 byte vectors (in XMM) are supported with SSE2+
1315     max_vector_size = 16;
1316   } else if (UseAVX == 1 || UseAVX == 2) {
1317     // 32 bytes vectors (in YMM) are only supported with AVX+
1318     max_vector_size = 32;
1319   } else if (UseAVX > 2) {
1320     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1321     max_vector_size = 64;
1322   }
1323 
1324 #ifdef _LP64
1325   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1326 #else
1327   int min_vector_size = 0;
1328 #endif
1329 
1330   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1331     if (MaxVectorSize < min_vector_size) {
1332       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1333       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1334     }
1335     if (MaxVectorSize > max_vector_size) {
1336       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1337       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1338     }
1339     if (!is_power_of_2(MaxVectorSize)) {
1340       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1341       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1342     }
1343   } else {
1344     // If default, use highest supported configuration
1345     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1346   }
1347 
1348 #if defined(COMPILER2) && defined(ASSERT)
1349   if (MaxVectorSize > 0) {
1350     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1351       tty->print_cr("State of YMM registers after signal handle:");
1352       int nreg = 2 LP64_ONLY(+2);
1353       const char* ymm_name[4] = {"0", "7", "8", "15"};
1354       for (int i = 0; i < nreg; i++) {
1355         tty->print("YMM%s:", ymm_name[i]);
1356         for (int j = 7; j >=0; j--) {
1357           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1358         }
1359         tty->cr();
1360       }
1361     }
1362   }
1363 #endif // COMPILER2 && ASSERT
1364 
1365 #ifdef _LP64
1366   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1367     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1368       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1369     }
1370   } else
1371 #endif
1372   if (UsePoly1305Intrinsics) {
1373     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1374     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1375   }
1376 
1377 #ifdef _LP64
1378   if (supports_avx512ifma() && supports_avx512vlbw()) {
1379     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1380       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1381     }
1382   } else
1383 #endif
1384   if (UseIntPolyIntrinsics) {
1385     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1386     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1387   }
1388 
1389 #ifdef _LP64
1390   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1391     UseMultiplyToLenIntrinsic = true;
1392   }
1393   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1394     UseSquareToLenIntrinsic = true;
1395   }
1396   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1397     UseMulAddIntrinsic = true;
1398   }
1399   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1400     UseMontgomeryMultiplyIntrinsic = true;
1401   }
1402   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1403     UseMontgomerySquareIntrinsic = true;
1404   }
1405 #else
1406   if (UseMultiplyToLenIntrinsic) {
1407     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1408       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1409     }
1410     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1411   }
1412   if (UseMontgomeryMultiplyIntrinsic) {
1413     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1414       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1415     }
1416     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1417   }
1418   if (UseMontgomerySquareIntrinsic) {
1419     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1420       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1421     }
1422     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1423   }
1424   if (UseSquareToLenIntrinsic) {
1425     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1426       warning("squareToLen intrinsic is not available in 32-bit VM");
1427     }
1428     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1429   }
1430   if (UseMulAddIntrinsic) {
1431     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1432       warning("mulAdd intrinsic is not available in 32-bit VM");
1433     }
1434     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1435   }
1436 #endif // _LP64
1437 #endif // COMPILER2_OR_JVMCI
1438 
1439   // On new cpus instructions which update whole XMM register should be used
1440   // to prevent partial register stall due to dependencies on high half.
1441   //
1442   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1443   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1444   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1445   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1446 
1447 
1448   if (is_zx()) { // ZX cpus specific settings
1449     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1450       UseStoreImmI16 = false; // don't use it on ZX cpus
1451     }
1452     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1453       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1454         // Use it on all ZX cpus
1455         UseAddressNop = true;
1456       }
1457     }
1458     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1459       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1460     }
1461     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1462       if (supports_sse3()) {
1463         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1464       } else {
1465         UseXmmRegToRegMoveAll = false;
1466       }
1467     }
1468     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1469 #ifdef COMPILER2
1470       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1471         // For new ZX cpus do the next optimization:
1472         // don't align the beginning of a loop if there are enough instructions
1473         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1474         // in current fetch line (OptoLoopAlignment) or the padding
1475         // is big (> MaxLoopPad).
1476         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1477         // generated NOP instructions. 11 is the largest size of one
1478         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1479         MaxLoopPad = 11;
1480       }
1481 #endif // COMPILER2
1482       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1483         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1484       }
1485       if (supports_sse4_2()) { // new ZX cpus
1486         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1487           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1488         }
1489       }
1490       if (supports_sse4_2()) {
1491         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1492           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1493         }
1494       } else {
1495         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1496           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1497         }
1498         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1499       }
1500     }
1501 
1502     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1503       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1504     }
1505   }
1506 
1507   if (is_amd_family()) { // AMD cpus specific settings
1508     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1509       // Use it on new AMD cpus starting from Opteron.
1510       UseAddressNop = true;
1511     }
1512     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1513       // Use it on new AMD cpus starting from Opteron.
1514       UseNewLongLShift = true;
1515     }
1516     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1517       if (supports_sse4a()) {
1518         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1519       } else {
1520         UseXmmLoadAndClearUpper = false;
1521       }
1522     }
1523     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1524       if (supports_sse4a()) {
1525         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1526       } else {
1527         UseXmmRegToRegMoveAll = false;
1528       }
1529     }
1530     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1531       if (supports_sse4a()) {
1532         UseXmmI2F = true;
1533       } else {
1534         UseXmmI2F = false;
1535       }
1536     }
1537     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1538       if (supports_sse4a()) {
1539         UseXmmI2D = true;
1540       } else {
1541         UseXmmI2D = false;
1542       }
1543     }
1544     if (supports_sse4_2()) {
1545       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1546         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1547       }
1548     } else {
1549       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1550         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1551       }
1552       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1553     }
1554 
1555     // some defaults for AMD family 15h
1556     if (cpu_family() == 0x15) {
1557       // On family 15h processors default is no sw prefetch
1558       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1559         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1560       }
1561       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1562       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1563         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1564       }
1565       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1566       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1567         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1568       }
1569       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1570         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1571       }
1572     }
1573 
1574 #ifdef COMPILER2
1575     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1576       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1577       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1578     }
1579 #endif // COMPILER2
1580 
1581     // Some defaults for AMD family >= 17h && Hygon family 18h
1582     if (cpu_family() >= 0x17) {
1583       // On family >=17h processors use XMM and UnalignedLoadStores
1584       // for Array Copy
1585       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1586         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1587       }
1588       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1589         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1590       }
1591 #ifdef COMPILER2
1592       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1593         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1594       }
1595 #endif
1596     }
1597   }
1598 
1599   if (is_intel()) { // Intel cpus specific settings
1600     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1601       UseStoreImmI16 = false; // don't use it on Intel cpus
1602     }
1603     if (cpu_family() == 6 || cpu_family() == 15) {
1604       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1605         // Use it on all Intel cpus starting from PentiumPro
1606         UseAddressNop = true;
1607       }
1608     }
1609     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1610       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1611     }
1612     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1613       if (supports_sse3()) {
1614         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1615       } else {
1616         UseXmmRegToRegMoveAll = false;
1617       }
1618     }
1619     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1620 #ifdef COMPILER2
1621       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1622         // For new Intel cpus do the next optimization:
1623         // don't align the beginning of a loop if there are enough instructions
1624         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1625         // in current fetch line (OptoLoopAlignment) or the padding
1626         // is big (> MaxLoopPad).
1627         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1628         // generated NOP instructions. 11 is the largest size of one
1629         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1630         MaxLoopPad = 11;
1631       }
1632 #endif // COMPILER2
1633 
1634       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1635         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1636       }
1637       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1638         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1639           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1640         }
1641       }
1642       if (supports_sse4_2()) {
1643         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1644           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1645         }
1646       } else {
1647         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1648           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1649         }
1650         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1651       }
1652     }
1653     if (is_atom_family() || is_knights_family()) {
1654 #ifdef COMPILER2
1655       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1656         OptoScheduling = true;
1657       }
1658 #endif
1659       if (supports_sse4_2()) { // Silvermont
1660         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1661           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1662         }
1663       }
1664       if (FLAG_IS_DEFAULT(UseIncDec)) {
1665         FLAG_SET_DEFAULT(UseIncDec, false);
1666       }
1667     }
1668     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1669       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1670     }
1671 #ifdef COMPILER2
1672     if (UseAVX > 2) {
1673       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1674           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1675            ArrayOperationPartialInlineSize != 0 &&
1676            ArrayOperationPartialInlineSize != 16 &&
1677            ArrayOperationPartialInlineSize != 32 &&
1678            ArrayOperationPartialInlineSize != 64)) {
1679         int inline_size = 0;
1680         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1681           inline_size = 64;
1682         } else if (MaxVectorSize >= 32) {
1683           inline_size = 32;
1684         } else if (MaxVectorSize >= 16) {
1685           inline_size = 16;
1686         }
1687         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1688           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1689         }
1690         ArrayOperationPartialInlineSize = inline_size;
1691       }
1692 
1693       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1694         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1695         if (ArrayOperationPartialInlineSize) {
1696           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1697         } else {
1698           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1699         }
1700       }
1701     }
1702 #endif
1703   }
1704 
1705 #ifdef COMPILER2
1706   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1707     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1708       OptimizeFill = false;
1709     }
1710   }
1711 #endif
1712 
1713 #ifdef _LP64
1714   if (UseSSE42Intrinsics) {
1715     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1716       UseVectorizedMismatchIntrinsic = true;
1717     }
1718   } else if (UseVectorizedMismatchIntrinsic) {
1719     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1720       warning("vectorizedMismatch intrinsics are not available on this CPU");
1721     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1722   }
1723   if (UseAVX >= 2) {
1724     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1725   } else if (UseVectorizedHashCodeIntrinsic) {
1726     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1727       warning("vectorizedHashCode intrinsics are not available on this CPU");
1728     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1729   }
1730 #else
1731   if (UseVectorizedMismatchIntrinsic) {
1732     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1733       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1734     }
1735     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1736   }
1737   if (UseVectorizedHashCodeIntrinsic) {
1738     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1739       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1740     }
1741     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1742   }
1743 #endif // _LP64
1744 
1745   // Use count leading zeros count instruction if available.
1746   if (supports_lzcnt()) {
1747     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1748       UseCountLeadingZerosInstruction = true;
1749     }
1750    } else if (UseCountLeadingZerosInstruction) {
1751     warning("lzcnt instruction is not available on this CPU");
1752     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1753   }
1754 
1755   // Use count trailing zeros instruction if available
1756   if (supports_bmi1()) {
1757     // tzcnt does not require VEX prefix
1758     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1759       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1760         // Don't use tzcnt if BMI1 is switched off on command line.
1761         UseCountTrailingZerosInstruction = false;
1762       } else {
1763         UseCountTrailingZerosInstruction = true;
1764       }
1765     }
1766   } else if (UseCountTrailingZerosInstruction) {
1767     warning("tzcnt instruction is not available on this CPU");
1768     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1769   }
1770 
1771   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1772   // VEX prefix is generated only when AVX > 0.
1773   if (supports_bmi1() && supports_avx()) {
1774     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1775       UseBMI1Instructions = true;
1776     }
1777   } else if (UseBMI1Instructions) {
1778     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1779     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1780   }
1781 
1782   if (supports_bmi2() && supports_avx()) {
1783     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1784       UseBMI2Instructions = true;
1785     }
1786   } else if (UseBMI2Instructions) {
1787     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1788     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1789   }
1790 
1791   // Use population count instruction if available.
1792   if (supports_popcnt()) {
1793     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1794       UsePopCountInstruction = true;
1795     }
1796   } else if (UsePopCountInstruction) {
1797     warning("POPCNT instruction is not available on this CPU");
1798     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1799   }
1800 
1801   // Use fast-string operations if available.
1802   if (supports_erms()) {
1803     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1804       UseFastStosb = true;
1805     }
1806   } else if (UseFastStosb) {
1807     warning("fast-string operations are not available on this CPU");
1808     FLAG_SET_DEFAULT(UseFastStosb, false);
1809   }
1810 
1811   // For AMD Processors use XMM/YMM MOVDQU instructions
1812   // for Object Initialization as default
1813   if (is_amd() && cpu_family() >= 0x19) {
1814     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1815       UseFastStosb = false;
1816     }
1817   }
1818 
1819 #ifdef COMPILER2
1820   if (is_intel() && MaxVectorSize > 16) {
1821     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1822       UseFastStosb = false;
1823     }
1824   }
1825 #endif
1826 
1827   // Use XMM/YMM MOVDQU instruction for Object Initialization
1828   if (UseSSE >= 2 && UseUnalignedLoadStores) {
1829     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1830       UseXMMForObjInit = true;
1831     }
1832   } else if (UseXMMForObjInit) {
1833     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1834     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1835   }
1836 
1837 #ifdef COMPILER2
1838   if (FLAG_IS_DEFAULT(AlignVector)) {
1839     // Modern processors allow misaligned memory operations for vectors.
1840     AlignVector = !UseUnalignedLoadStores;
1841   }
1842 #endif // COMPILER2
1843 
1844   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1845     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1846       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1847     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1848       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1849     }
1850   }
1851 
1852   // Allocation prefetch settings
1853   int cache_line_size = checked_cast<int>(prefetch_data_size());
1854   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1855       (cache_line_size > AllocatePrefetchStepSize)) {
1856     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1857   }
1858 
1859   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1860     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1861     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1862       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1863     }
1864     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1865   }
1866 
1867   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1868     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1869     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1870   }
1871 
1872   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1873     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1874         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1875       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1876     }
1877 #ifdef COMPILER2
1878     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1879       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1880     }
1881 #endif
1882   }
1883 
1884   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1885 #ifdef COMPILER2
1886     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1887       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1888     }
1889 #endif
1890   }
1891 
1892 #ifdef _LP64
1893   // Prefetch settings
1894 
1895   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1896   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1897   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1898   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1899 
1900   // gc copy/scan is disabled if prefetchw isn't supported, because
1901   // Prefetch::write emits an inlined prefetchw on Linux.
1902   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1903   // The used prefetcht0 instruction works for both amd64 and em64t.
1904 
1905   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1906     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1907   }
1908   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1909     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1910   }
1911 #endif
1912 
1913   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1914      (cache_line_size > ContendedPaddingWidth))
1915      ContendedPaddingWidth = cache_line_size;
1916 
1917   // This machine allows unaligned memory accesses
1918   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1919     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1920   }
1921 
1922 #ifndef PRODUCT
1923   if (log_is_enabled(Info, os, cpu)) {
1924     LogStream ls(Log(os, cpu)::info());
1925     outputStream* log = &ls;
1926     log->print_cr("Logical CPUs per core: %u",
1927                   logical_processors_per_package());
1928     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1929     log->print("UseSSE=%d", UseSSE);
1930     if (UseAVX > 0) {
1931       log->print("  UseAVX=%d", UseAVX);
1932     }
1933     if (UseAES) {
1934       log->print("  UseAES=1");
1935     }
1936 #ifdef COMPILER2
1937     if (MaxVectorSize > 0) {
1938       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1939     }
1940 #endif
1941     log->cr();
1942     log->print("Allocation");
1943     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1944       log->print_cr(": no prefetching");
1945     } else {
1946       log->print(" prefetching: ");
1947       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1948         log->print("PREFETCHW");
1949       } else if (UseSSE >= 1) {
1950         if (AllocatePrefetchInstr == 0) {
1951           log->print("PREFETCHNTA");
1952         } else if (AllocatePrefetchInstr == 1) {
1953           log->print("PREFETCHT0");
1954         } else if (AllocatePrefetchInstr == 2) {
1955           log->print("PREFETCHT2");
1956         } else if (AllocatePrefetchInstr == 3) {
1957           log->print("PREFETCHW");
1958         }
1959       }
1960       if (AllocatePrefetchLines > 1) {
1961         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1962       } else {
1963         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1964       }
1965     }
1966 
1967     if (PrefetchCopyIntervalInBytes > 0) {
1968       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1969     }
1970     if (PrefetchScanIntervalInBytes > 0) {
1971       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1972     }
1973     if (ContendedPaddingWidth > 0) {
1974       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1975     }
1976   }
1977 #endif // !PRODUCT
1978   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1979       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1980   }
1981   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1982       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1983   }
1984 }
1985 
1986 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1987   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1988   if (vrt == XenHVM) {
1989     st->print_cr("Xen hardware-assisted virtualization detected");
1990   } else if (vrt == KVM) {
1991     st->print_cr("KVM virtualization detected");
1992   } else if (vrt == VMWare) {
1993     st->print_cr("VMWare virtualization detected");
1994     VirtualizationSupport::print_virtualization_info(st);
1995   } else if (vrt == HyperV) {
1996     st->print_cr("Hyper-V virtualization detected");
1997   } else if (vrt == HyperVRole) {
1998     st->print_cr("Hyper-V role detected");
1999   }
2000 }
2001 
2002 bool VM_Version::compute_has_intel_jcc_erratum() {
2003   if (!is_intel_family_core()) {
2004     // Only Intel CPUs are affected.
2005     return false;
2006   }
2007   // The following table of affected CPUs is based on the following document released by Intel:
2008   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2009   switch (_model) {
2010   case 0x8E:
2011     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2012     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2013     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2014     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2015     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2016     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2017     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2018     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2019     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2020     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2021   case 0x4E:
2022     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2023     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2024     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2025     return _stepping == 0x3;
2026   case 0x55:
2027     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2028     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2029     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2030     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2031     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2032     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2033     return _stepping == 0x4 || _stepping == 0x7;
2034   case 0x5E:
2035     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2036     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2037     return _stepping == 0x3;
2038   case 0x9E:
2039     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2040     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2041     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2042     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2043     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2044     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2045     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2046     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2047     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2048     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2049     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2050     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2051     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2052     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2053     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2054   case 0xA5:
2055     // Not in Intel documentation.
2056     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2057     return true;
2058   case 0xA6:
2059     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2060     return _stepping == 0x0;
2061   case 0xAE:
2062     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2063     return _stepping == 0xA;
2064   default:
2065     // If we are running on another intel machine not recognized in the table, we are okay.
2066     return false;
2067   }
2068 }
2069 
2070 // On Xen, the cpuid instruction returns
2071 //  eax / registers[0]: Version of Xen
2072 //  ebx / registers[1]: chars 'XenV'
2073 //  ecx / registers[2]: chars 'MMXe'
2074 //  edx / registers[3]: chars 'nVMM'
2075 //
2076 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2077 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2078 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2079 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2080 //
2081 // more information :
2082 // https://kb.vmware.com/s/article/1009458
2083 //
2084 void VM_Version::check_virtualizations() {
2085   uint32_t registers[4] = {0};
2086   char signature[13] = {0};
2087 
2088   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2089   // from 0x40000000 until 0x40010000.
2090   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2091   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2092     detect_virt_stub(leaf, registers);
2093     memcpy(signature, &registers[1], 12);
2094 
2095     if (strncmp("VMwareVMware", signature, 12) == 0) {
2096       Abstract_VM_Version::_detected_virtualization = VMWare;
2097       // check for extended metrics from guestlib
2098       VirtualizationSupport::initialize();
2099     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2100       Abstract_VM_Version::_detected_virtualization = HyperV;
2101 #ifdef _WINDOWS
2102       // CPUID leaf 0x40000007 is available to the root partition only.
2103       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2104       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2105       detect_virt_stub(0x40000007, registers);
2106       if ((registers[0] != 0x0) ||
2107           (registers[1] != 0x0) ||
2108           (registers[2] != 0x0) ||
2109           (registers[3] != 0x0)) {
2110         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2111       }
2112 #endif
2113     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2114       Abstract_VM_Version::_detected_virtualization = KVM;
2115     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2116       Abstract_VM_Version::_detected_virtualization = XenHVM;
2117     }
2118   }
2119 }
2120 
2121 #ifdef COMPILER2
2122 // Determine if it's running on Cascade Lake using default options.
2123 bool VM_Version::is_default_intel_cascade_lake() {
2124   return FLAG_IS_DEFAULT(UseAVX) &&
2125          FLAG_IS_DEFAULT(MaxVectorSize) &&
2126          UseAVX > 2 &&
2127          is_intel_cascade_lake();
2128 }
2129 #endif
2130 
2131 bool VM_Version::is_intel_cascade_lake() {
2132   return is_intel_skylake() && _stepping >= 5;
2133 }
2134 
2135 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2136 // for implementing the array copy and clear operations.
2137 // The Intel platforms that supports the serialize instruction
2138 // has improved implementation of 64-byte load/stores and so the default
2139 // threshold is set to 0 for these platforms.
2140 int VM_Version::avx3_threshold() {
2141   return (is_intel_family_core() &&
2142           supports_serialize() &&
2143           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2144 }
2145 
2146 static bool _vm_version_initialized = false;
2147 
2148 void VM_Version::initialize() {
2149   ResourceMark rm;
2150   // Making this stub must be FIRST use of assembler
2151   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2152   if (stub_blob == nullptr) {
2153     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2154   }
2155   CodeBuffer c(stub_blob);
2156   VM_Version_StubGenerator g(&c);
2157 
2158   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2159                                      g.generate_get_cpu_info());
2160   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2161                                      g.generate_detect_virt());
2162 
2163   get_processor_features();
2164 
2165   LP64_ONLY(Assembler::precompute_instructions();)
2166 
2167   if (VM_Version::supports_hv()) { // Supports hypervisor
2168     check_virtualizations();
2169   }
2170   _vm_version_initialized = true;
2171 }
2172 
2173 typedef enum {
2174    CPU_FAMILY_8086_8088  = 0,
2175    CPU_FAMILY_INTEL_286  = 2,
2176    CPU_FAMILY_INTEL_386  = 3,
2177    CPU_FAMILY_INTEL_486  = 4,
2178    CPU_FAMILY_PENTIUM    = 5,
2179    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2180    CPU_FAMILY_PENTIUM_4  = 0xF
2181 } FamilyFlag;
2182 
2183 typedef enum {
2184   RDTSCP_FLAG  = 0x08000000, // bit 27
2185   INTEL64_FLAG = 0x20000000  // bit 29
2186 } _featureExtendedEdxFlag;
2187 
2188 typedef enum {
2189    FPU_FLAG     = 0x00000001,
2190    VME_FLAG     = 0x00000002,
2191    DE_FLAG      = 0x00000004,
2192    PSE_FLAG     = 0x00000008,
2193    TSC_FLAG     = 0x00000010,
2194    MSR_FLAG     = 0x00000020,
2195    PAE_FLAG     = 0x00000040,
2196    MCE_FLAG     = 0x00000080,
2197    CX8_FLAG     = 0x00000100,
2198    APIC_FLAG    = 0x00000200,
2199    SEP_FLAG     = 0x00000800,
2200    MTRR_FLAG    = 0x00001000,
2201    PGE_FLAG     = 0x00002000,
2202    MCA_FLAG     = 0x00004000,
2203    CMOV_FLAG    = 0x00008000,
2204    PAT_FLAG     = 0x00010000,
2205    PSE36_FLAG   = 0x00020000,
2206    PSNUM_FLAG   = 0x00040000,
2207    CLFLUSH_FLAG = 0x00080000,
2208    DTS_FLAG     = 0x00200000,
2209    ACPI_FLAG    = 0x00400000,
2210    MMX_FLAG     = 0x00800000,
2211    FXSR_FLAG    = 0x01000000,
2212    SSE_FLAG     = 0x02000000,
2213    SSE2_FLAG    = 0x04000000,
2214    SS_FLAG      = 0x08000000,
2215    HTT_FLAG     = 0x10000000,
2216    TM_FLAG      = 0x20000000
2217 } FeatureEdxFlag;
2218 
2219 static BufferBlob* cpuid_brand_string_stub_blob;
2220 static const int   cpuid_brand_string_stub_size = 550;
2221 
2222 extern "C" {
2223   typedef void (*getCPUIDBrandString_stub_t)(void*);
2224 }
2225 
2226 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2227 
2228 // VM_Version statics
2229 enum {
2230   ExtendedFamilyIdLength_INTEL = 16,
2231   ExtendedFamilyIdLength_AMD   = 24
2232 };
2233 
2234 const size_t VENDOR_LENGTH = 13;
2235 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2236 static char* _cpu_brand_string = nullptr;
2237 static int64_t _max_qualified_cpu_frequency = 0;
2238 
2239 static int _no_of_threads = 0;
2240 static int _no_of_cores = 0;
2241 
2242 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2243   "8086/8088",
2244   "",
2245   "286",
2246   "386",
2247   "486",
2248   "Pentium",
2249   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2250   "",
2251   "",
2252   "",
2253   "",
2254   "",
2255   "",
2256   "",
2257   "",
2258   "Pentium 4"
2259 };
2260 
2261 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2262   "",
2263   "",
2264   "",
2265   "",
2266   "5x86",
2267   "K5/K6",
2268   "Athlon/AthlonXP",
2269   "",
2270   "",
2271   "",
2272   "",
2273   "",
2274   "",
2275   "",
2276   "",
2277   "Opteron/Athlon64",
2278   "Opteron QC/Phenom",  // Barcelona et.al.
2279   "",
2280   "",
2281   "",
2282   "",
2283   "",
2284   "",
2285   "Zen"
2286 };
2287 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2288 // September 2013, Vol 3C Table 35-1
2289 const char* const _model_id_pentium_pro[] = {
2290   "",
2291   "Pentium Pro",
2292   "",
2293   "Pentium II model 3",
2294   "",
2295   "Pentium II model 5/Xeon/Celeron",
2296   "Celeron",
2297   "Pentium III/Pentium III Xeon",
2298   "Pentium III/Pentium III Xeon",
2299   "Pentium M model 9",    // Yonah
2300   "Pentium III, model A",
2301   "Pentium III, model B",
2302   "",
2303   "Pentium M model D",    // Dothan
2304   "",
2305   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2306   "",
2307   "",
2308   "",
2309   "",
2310   "",
2311   "",
2312   "Celeron",              // 0x16 Celeron 65nm
2313   "Core 2",               // 0x17 Penryn / Harpertown
2314   "",
2315   "",
2316   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2317   "Atom",                 // 0x1B Z5xx series Silverthorn
2318   "",
2319   "Core 2",               // 0x1D Dunnington (6-core)
2320   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2321   "",
2322   "",
2323   "",
2324   "",
2325   "",
2326   "",
2327   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2328   "",
2329   "",
2330   "",                     // 0x28
2331   "",
2332   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2333   "",
2334   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2335   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2336   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2337   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2338   "",
2339   "",
2340   "",
2341   "",
2342   "",
2343   "",
2344   "",
2345   "",
2346   "",
2347   "",
2348   "Ivy Bridge",           // 0x3a
2349   "",
2350   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2351   "",                     // 0x3d "Next Generation Intel Core Processor"
2352   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2353   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2354   "",
2355   "",
2356   "",
2357   "",
2358   "",
2359   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2360   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2361   nullptr
2362 };
2363 
2364 /* Brand ID is for back compatibility
2365  * Newer CPUs uses the extended brand string */
2366 const char* const _brand_id[] = {
2367   "",
2368   "Celeron processor",
2369   "Pentium III processor",
2370   "Intel Pentium III Xeon processor",
2371   "",
2372   "",
2373   "",
2374   "",
2375   "Intel Pentium 4 processor",
2376   nullptr
2377 };
2378 
2379 
2380 const char* const _feature_edx_id[] = {
2381   "On-Chip FPU",
2382   "Virtual Mode Extensions",
2383   "Debugging Extensions",
2384   "Page Size Extensions",
2385   "Time Stamp Counter",
2386   "Model Specific Registers",
2387   "Physical Address Extension",
2388   "Machine Check Exceptions",
2389   "CMPXCHG8B Instruction",
2390   "On-Chip APIC",
2391   "",
2392   "Fast System Call",
2393   "Memory Type Range Registers",
2394   "Page Global Enable",
2395   "Machine Check Architecture",
2396   "Conditional Mov Instruction",
2397   "Page Attribute Table",
2398   "36-bit Page Size Extension",
2399   "Processor Serial Number",
2400   "CLFLUSH Instruction",
2401   "",
2402   "Debug Trace Store feature",
2403   "ACPI registers in MSR space",
2404   "Intel Architecture MMX Technology",
2405   "Fast Float Point Save and Restore",
2406   "Streaming SIMD extensions",
2407   "Streaming SIMD extensions 2",
2408   "Self-Snoop",
2409   "Hyper Threading",
2410   "Thermal Monitor",
2411   "",
2412   "Pending Break Enable"
2413 };
2414 
2415 const char* const _feature_extended_edx_id[] = {
2416   "",
2417   "",
2418   "",
2419   "",
2420   "",
2421   "",
2422   "",
2423   "",
2424   "",
2425   "",
2426   "",
2427   "SYSCALL/SYSRET",
2428   "",
2429   "",
2430   "",
2431   "",
2432   "",
2433   "",
2434   "",
2435   "",
2436   "Execute Disable Bit",
2437   "",
2438   "",
2439   "",
2440   "",
2441   "",
2442   "",
2443   "RDTSCP",
2444   "",
2445   "Intel 64 Architecture",
2446   "",
2447   ""
2448 };
2449 
2450 const char* const _feature_ecx_id[] = {
2451   "Streaming SIMD Extensions 3",
2452   "PCLMULQDQ",
2453   "64-bit DS Area",
2454   "MONITOR/MWAIT instructions",
2455   "CPL Qualified Debug Store",
2456   "Virtual Machine Extensions",
2457   "Safer Mode Extensions",
2458   "Enhanced Intel SpeedStep technology",
2459   "Thermal Monitor 2",
2460   "Supplemental Streaming SIMD Extensions 3",
2461   "L1 Context ID",
2462   "",
2463   "Fused Multiply-Add",
2464   "CMPXCHG16B",
2465   "xTPR Update Control",
2466   "Perfmon and Debug Capability",
2467   "",
2468   "Process-context identifiers",
2469   "Direct Cache Access",
2470   "Streaming SIMD extensions 4.1",
2471   "Streaming SIMD extensions 4.2",
2472   "x2APIC",
2473   "MOVBE",
2474   "Popcount instruction",
2475   "TSC-Deadline",
2476   "AESNI",
2477   "XSAVE",
2478   "OSXSAVE",
2479   "AVX",
2480   "F16C",
2481   "RDRAND",
2482   ""
2483 };
2484 
2485 const char* const _feature_extended_ecx_id[] = {
2486   "LAHF/SAHF instruction support",
2487   "Core multi-processor legacy mode",
2488   "",
2489   "",
2490   "",
2491   "Advanced Bit Manipulations: LZCNT",
2492   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2493   "Misaligned SSE mode",
2494   "",
2495   "",
2496   "",
2497   "",
2498   "",
2499   "",
2500   "",
2501   "",
2502   "",
2503   "",
2504   "",
2505   "",
2506   "",
2507   "",
2508   "",
2509   "",
2510   "",
2511   "",
2512   "",
2513   "",
2514   "",
2515   "",
2516   "",
2517   ""
2518 };
2519 
2520 void VM_Version::initialize_tsc(void) {
2521   ResourceMark rm;
2522 
2523   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2524   if (cpuid_brand_string_stub_blob == nullptr) {
2525     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2526   }
2527   CodeBuffer c(cpuid_brand_string_stub_blob);
2528   VM_Version_StubGenerator g(&c);
2529   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2530                                    g.generate_getCPUIDBrandString());
2531 }
2532 
2533 const char* VM_Version::cpu_model_description(void) {
2534   uint32_t cpu_family = extended_cpu_family();
2535   uint32_t cpu_model = extended_cpu_model();
2536   const char* model = nullptr;
2537 
2538   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2539     for (uint32_t i = 0; i <= cpu_model; i++) {
2540       model = _model_id_pentium_pro[i];
2541       if (model == nullptr) {
2542         break;
2543       }
2544     }
2545   }
2546   return model;
2547 }
2548 
2549 const char* VM_Version::cpu_brand_string(void) {
2550   if (_cpu_brand_string == nullptr) {
2551     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2552     if (nullptr == _cpu_brand_string) {
2553       return nullptr;
2554     }
2555     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2556     if (ret_val != OS_OK) {
2557       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2558       _cpu_brand_string = nullptr;
2559     }
2560   }
2561   return _cpu_brand_string;
2562 }
2563 
2564 const char* VM_Version::cpu_brand(void) {
2565   const char*  brand  = nullptr;
2566 
2567   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2568     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2569     brand = _brand_id[0];
2570     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2571       brand = _brand_id[i];
2572     }
2573   }
2574   return brand;
2575 }
2576 
2577 bool VM_Version::cpu_is_em64t(void) {
2578   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2579 }
2580 
2581 bool VM_Version::is_netburst(void) {
2582   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2583 }
2584 
2585 bool VM_Version::supports_tscinv_ext(void) {
2586   if (!supports_tscinv_bit()) {
2587     return false;
2588   }
2589 
2590   if (is_intel()) {
2591     return true;
2592   }
2593 
2594   if (is_amd()) {
2595     return !is_amd_Barcelona();
2596   }
2597 
2598   if (is_hygon()) {
2599     return true;
2600   }
2601 
2602   return false;
2603 }
2604 
2605 void VM_Version::resolve_cpu_information_details(void) {
2606 
2607   // in future we want to base this information on proper cpu
2608   // and cache topology enumeration such as:
2609   // Intel 64 Architecture Processor Topology Enumeration
2610   // which supports system cpu and cache topology enumeration
2611   // either using 2xAPICIDs or initial APICIDs
2612 
2613   // currently only rough cpu information estimates
2614   // which will not necessarily reflect the exact configuration of the system
2615 
2616   // this is the number of logical hardware threads
2617   // visible to the operating system
2618   _no_of_threads = os::processor_count();
2619 
2620   // find out number of threads per cpu package
2621   int threads_per_package = threads_per_core() * cores_per_cpu();
2622 
2623   // use amount of threads visible to the process in order to guess number of sockets
2624   _no_of_sockets = _no_of_threads / threads_per_package;
2625 
2626   // process might only see a subset of the total number of threads
2627   // from a single processor package. Virtualization/resource management for example.
2628   // If so then just write a hard 1 as num of pkgs.
2629   if (0 == _no_of_sockets) {
2630     _no_of_sockets = 1;
2631   }
2632 
2633   // estimate the number of cores
2634   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2635 }
2636 
2637 
2638 const char* VM_Version::cpu_family_description(void) {
2639   int cpu_family_id = extended_cpu_family();
2640   if (is_amd()) {
2641     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2642       return _family_id_amd[cpu_family_id];
2643     }
2644   }
2645   if (is_intel()) {
2646     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2647       return cpu_model_description();
2648     }
2649     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2650       return _family_id_intel[cpu_family_id];
2651     }
2652   }
2653   if (is_hygon()) {
2654     return "Dhyana";
2655   }
2656   return "Unknown x86";
2657 }
2658 
2659 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2660   assert(buf != nullptr, "buffer is null!");
2661   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2662 
2663   const char* cpu_type = nullptr;
2664   const char* x64 = nullptr;
2665 
2666   if (is_intel()) {
2667     cpu_type = "Intel";
2668     x64 = cpu_is_em64t() ? " Intel64" : "";
2669   } else if (is_amd()) {
2670     cpu_type = "AMD";
2671     x64 = cpu_is_em64t() ? " AMD64" : "";
2672   } else if (is_hygon()) {
2673     cpu_type = "Hygon";
2674     x64 = cpu_is_em64t() ? " AMD64" : "";
2675   } else {
2676     cpu_type = "Unknown x86";
2677     x64 = cpu_is_em64t() ? " x86_64" : "";
2678   }
2679 
2680   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2681     cpu_type,
2682     cpu_family_description(),
2683     supports_ht() ? " (HT)" : "",
2684     supports_sse3() ? " SSE3" : "",
2685     supports_ssse3() ? " SSSE3" : "",
2686     supports_sse4_1() ? " SSE4.1" : "",
2687     supports_sse4_2() ? " SSE4.2" : "",
2688     supports_sse4a() ? " SSE4A" : "",
2689     is_netburst() ? " Netburst" : "",
2690     is_intel_family_core() ? " Core" : "",
2691     x64);
2692 
2693   return OS_OK;
2694 }
2695 
2696 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2697   assert(buf != nullptr, "buffer is null!");
2698   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2699   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2700 
2701   // invoke newly generated asm code to fetch CPU Brand String
2702   getCPUIDBrandString_stub(&_cpuid_info);
2703 
2704   // fetch results into buffer
2705   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2706   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2707   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2708   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2709   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2710   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2711   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2712   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2713   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2714   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2715   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2716   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2717 
2718   return OS_OK;
2719 }
2720 
2721 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2722   guarantee(buf != nullptr, "buffer is null!");
2723   guarantee(buf_len > 0, "buffer len not enough!");
2724 
2725   unsigned int flag = 0;
2726   unsigned int fi = 0;
2727   size_t       written = 0;
2728   const char*  prefix = "";
2729 
2730 #define WRITE_TO_BUF(string)                                                          \
2731   {                                                                                   \
2732     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2733     if (res < 0) {                                                                    \
2734       return buf_len - 1;                                                             \
2735     }                                                                                 \
2736     written += res;                                                                   \
2737     if (prefix[0] == '\0') {                                                          \
2738       prefix = ", ";                                                                  \
2739     }                                                                                 \
2740   }
2741 
2742   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2743     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2744       continue; /* no hyperthreading */
2745     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2746       continue; /* no fast system call */
2747     }
2748     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2749       WRITE_TO_BUF(_feature_edx_id[fi]);
2750     }
2751   }
2752 
2753   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2754     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2755       WRITE_TO_BUF(_feature_ecx_id[fi]);
2756     }
2757   }
2758 
2759   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2760     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2761       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2762     }
2763   }
2764 
2765   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2766     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2767       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2768     }
2769   }
2770 
2771   if (supports_tscinv_bit()) {
2772       WRITE_TO_BUF("Invariant TSC");
2773   }
2774 
2775   return written;
2776 }
2777 
2778 /**
2779  * Write a detailed description of the cpu to a given buffer, including
2780  * feature set.
2781  */
2782 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2783   assert(buf != nullptr, "buffer is null!");
2784   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2785 
2786   static const char* unknown = "<unknown>";
2787   char               vendor_id[VENDOR_LENGTH];
2788   const char*        family = nullptr;
2789   const char*        model = nullptr;
2790   const char*        brand = nullptr;
2791   int                outputLen = 0;
2792 
2793   family = cpu_family_description();
2794   if (family == nullptr) {
2795     family = unknown;
2796   }
2797 
2798   model = cpu_model_description();
2799   if (model == nullptr) {
2800     model = unknown;
2801   }
2802 
2803   brand = cpu_brand_string();
2804 
2805   if (brand == nullptr) {
2806     brand = cpu_brand();
2807     if (brand == nullptr) {
2808       brand = unknown;
2809     }
2810   }
2811 
2812   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2813   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2814   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2815   vendor_id[VENDOR_LENGTH-1] = '\0';
2816 
2817   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2818     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2819     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2820     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2821     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2822     "Supports: ",
2823     brand,
2824     vendor_id,
2825     family,
2826     extended_cpu_family(),
2827     model,
2828     extended_cpu_model(),
2829     cpu_stepping(),
2830     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2831     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2832     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2833     _cpuid_info.std_cpuid1_eax.value,
2834     _cpuid_info.std_cpuid1_ebx.value,
2835     _cpuid_info.std_cpuid1_ecx.value,
2836     _cpuid_info.std_cpuid1_edx.value,
2837     _cpuid_info.ext_cpuid1_eax,
2838     _cpuid_info.ext_cpuid1_ebx,
2839     _cpuid_info.ext_cpuid1_ecx,
2840     _cpuid_info.ext_cpuid1_edx);
2841 
2842   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2843     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2844     return OS_ERR;
2845   }
2846 
2847   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2848 
2849   return OS_OK;
2850 }
2851 
2852 
2853 // Fill in Abstract_VM_Version statics
2854 void VM_Version::initialize_cpu_information() {
2855   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2856   assert(!_initialized, "shouldn't be initialized yet");
2857   resolve_cpu_information_details();
2858 
2859   // initialize cpu_name and cpu_desc
2860   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2861   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2862   _initialized = true;
2863 }
2864 
2865 /**
2866  *  For information about extracting the frequency from the cpu brand string, please see:
2867  *
2868  *    Intel Processor Identification and the CPUID Instruction
2869  *    Application Note 485
2870  *    May 2012
2871  *
2872  * The return value is the frequency in Hz.
2873  */
2874 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2875   const char* const brand_string = cpu_brand_string();
2876   if (brand_string == nullptr) {
2877     return 0;
2878   }
2879   const int64_t MEGA = 1000000;
2880   int64_t multiplier = 0;
2881   int64_t frequency = 0;
2882   uint8_t idx = 0;
2883   // The brand string buffer is at most 48 bytes.
2884   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2885   for (; idx < 48-2; ++idx) {
2886     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2887     // Search brand string for "yHz" where y is M, G, or T.
2888     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2889       if (brand_string[idx] == 'M') {
2890         multiplier = MEGA;
2891       } else if (brand_string[idx] == 'G') {
2892         multiplier = MEGA * 1000;
2893       } else if (brand_string[idx] == 'T') {
2894         multiplier = MEGA * MEGA;
2895       }
2896       break;
2897     }
2898   }
2899   if (multiplier > 0) {
2900     // Compute frequency (in Hz) from brand string.
2901     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2902       frequency =  (brand_string[idx-4] - '0') * multiplier;
2903       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2904       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2905     } else { // format is "xxxx"
2906       frequency =  (brand_string[idx-4] - '0') * 1000;
2907       frequency += (brand_string[idx-3] - '0') * 100;
2908       frequency += (brand_string[idx-2] - '0') * 10;
2909       frequency += (brand_string[idx-1] - '0');
2910       frequency *= multiplier;
2911     }
2912   }
2913   return frequency;
2914 }
2915 
2916 
2917 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2918   if (_max_qualified_cpu_frequency == 0) {
2919     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2920   }
2921   return _max_qualified_cpu_frequency;
2922 }
2923 
2924 uint64_t VM_Version::CpuidInfo::feature_flags() const {
2925   uint64_t result = 0;
2926   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2927     result |= CPU_CX8;
2928   if (std_cpuid1_edx.bits.cmov != 0)
2929     result |= CPU_CMOV;
2930   if (std_cpuid1_edx.bits.clflush != 0)
2931     result |= CPU_FLUSH;
2932 #ifdef _LP64
2933   // clflush should always be available on x86_64
2934   // if not we are in real trouble because we rely on it
2935   // to flush the code cache.
2936   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2937 #endif
2938   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2939       ext_cpuid1_edx.bits.fxsr != 0))
2940     result |= CPU_FXSR;
2941   // HT flag is set for multi-core processors also.
2942   if (threads_per_core() > 1)
2943     result |= CPU_HT;
2944   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2945       ext_cpuid1_edx.bits.mmx != 0))
2946     result |= CPU_MMX;
2947   if (std_cpuid1_edx.bits.sse != 0)
2948     result |= CPU_SSE;
2949   if (std_cpuid1_edx.bits.sse2 != 0)
2950     result |= CPU_SSE2;
2951   if (std_cpuid1_ecx.bits.sse3 != 0)
2952     result |= CPU_SSE3;
2953   if (std_cpuid1_ecx.bits.ssse3 != 0)
2954     result |= CPU_SSSE3;
2955   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2956     result |= CPU_SSE4_1;
2957   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2958     result |= CPU_SSE4_2;
2959   if (std_cpuid1_ecx.bits.popcnt != 0)
2960     result |= CPU_POPCNT;
2961   if (std_cpuid1_ecx.bits.avx != 0 &&
2962       std_cpuid1_ecx.bits.osxsave != 0 &&
2963       xem_xcr0_eax.bits.sse != 0 &&
2964       xem_xcr0_eax.bits.ymm != 0) {
2965     result |= CPU_AVX;
2966     result |= CPU_VZEROUPPER;
2967     if (std_cpuid1_ecx.bits.f16c != 0)
2968       result |= CPU_F16C;
2969     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2970       result |= CPU_AVX2;
2971       if (sef_cpuid7_ecx1_eax.bits.avx_ifma != 0)
2972         result |= CPU_AVX_IFMA;
2973     }
2974     if (sef_cpuid7_ecx.bits.gfni != 0)
2975         result |= CPU_GFNI;
2976     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2977         xem_xcr0_eax.bits.opmask != 0 &&
2978         xem_xcr0_eax.bits.zmm512 != 0 &&
2979         xem_xcr0_eax.bits.zmm32 != 0) {
2980       result |= CPU_AVX512F;
2981       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2982         result |= CPU_AVX512CD;
2983       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2984         result |= CPU_AVX512DQ;
2985       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2986         result |= CPU_AVX512_IFMA;
2987       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2988         result |= CPU_AVX512PF;
2989       if (sef_cpuid7_ebx.bits.avx512er != 0)
2990         result |= CPU_AVX512ER;
2991       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2992         result |= CPU_AVX512BW;
2993       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2994         result |= CPU_AVX512VL;
2995       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2996         result |= CPU_AVX512_VPOPCNTDQ;
2997       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2998         result |= CPU_AVX512_VPCLMULQDQ;
2999       if (sef_cpuid7_ecx.bits.vaes != 0)
3000         result |= CPU_AVX512_VAES;
3001       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3002         result |= CPU_AVX512_VNNI;
3003       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3004         result |= CPU_AVX512_BITALG;
3005       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3006         result |= CPU_AVX512_VBMI;
3007       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3008         result |= CPU_AVX512_VBMI2;
3009     }
3010   }
3011   if (std_cpuid1_ecx.bits.hv != 0)
3012     result |= CPU_HV;
3013   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3014     result |= CPU_BMI1;
3015   if (std_cpuid1_edx.bits.tsc != 0)
3016     result |= CPU_TSC;
3017   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3018     result |= CPU_TSCINV_BIT;
3019   if (std_cpuid1_ecx.bits.aes != 0)
3020     result |= CPU_AES;
3021   if (sef_cpuid7_ebx.bits.erms != 0)
3022     result |= CPU_ERMS;
3023   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3024     result |= CPU_FSRM;
3025   if (std_cpuid1_ecx.bits.clmul != 0)
3026     result |= CPU_CLMUL;
3027   if (sef_cpuid7_ebx.bits.rtm != 0)
3028     result |= CPU_RTM;
3029   if (sef_cpuid7_ebx.bits.adx != 0)
3030      result |= CPU_ADX;
3031   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3032     result |= CPU_BMI2;
3033   if (sef_cpuid7_ebx.bits.sha != 0)
3034     result |= CPU_SHA;
3035   if (std_cpuid1_ecx.bits.fma != 0)
3036     result |= CPU_FMA;
3037   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3038     result |= CPU_FLUSHOPT;
3039   if (ext_cpuid1_edx.bits.rdtscp != 0)
3040     result |= CPU_RDTSCP;
3041   if (sef_cpuid7_ecx.bits.rdpid != 0)
3042     result |= CPU_RDPID;
3043 
3044   // AMD|Hygon features.
3045   if (is_amd_family()) {
3046     if ((ext_cpuid1_edx.bits.tdnow != 0) ||
3047         (ext_cpuid1_ecx.bits.prefetchw != 0))
3048       result |= CPU_3DNOW_PREFETCH;
3049     if (ext_cpuid1_ecx.bits.lzcnt != 0)
3050       result |= CPU_LZCNT;
3051     if (ext_cpuid1_ecx.bits.sse4a != 0)
3052       result |= CPU_SSE4A;
3053   }
3054 
3055   // Intel features.
3056   if (is_intel()) {
3057     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3058       result |= CPU_LZCNT;
3059     }
3060     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3061       result |= CPU_3DNOW_PREFETCH;
3062     }
3063     if (sef_cpuid7_ebx.bits.clwb != 0) {
3064       result |= CPU_CLWB;
3065     }
3066     if (sef_cpuid7_edx.bits.serialize != 0)
3067       result |= CPU_SERIALIZE;
3068   }
3069 
3070   // ZX features.
3071   if (is_zx()) {
3072     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3073       result |= CPU_LZCNT;
3074     }
3075     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3076       result |= CPU_3DNOW_PREFETCH;
3077     }
3078   }
3079 
3080   // Protection key features.
3081   if (sef_cpuid7_ecx.bits.pku != 0) {
3082     result |= CPU_PKU;
3083   }
3084   if (sef_cpuid7_ecx.bits.ospke != 0) {
3085     result |= CPU_OSPKE;
3086   }
3087 
3088   // Control flow enforcement (CET) features.
3089   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3090     result |= CPU_CET_SS;
3091   }
3092   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3093     result |= CPU_CET_IBT;
3094   }
3095 
3096   // Composite features.
3097   if (supports_tscinv_bit() &&
3098       ((is_amd_family() && !is_amd_Barcelona()) ||
3099        is_intel_tsc_synched_at_init())) {
3100     result |= CPU_TSCINV;
3101   }
3102 
3103   return result;
3104 }
3105 
3106 bool VM_Version::os_supports_avx_vectors() {
3107   bool retVal = false;
3108   int nreg = 2 LP64_ONLY(+2);
3109   if (supports_evex()) {
3110     // Verify that OS save/restore all bits of EVEX registers
3111     // during signal processing.
3112     retVal = true;
3113     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3114       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3115         retVal = false;
3116         break;
3117       }
3118     }
3119   } else if (supports_avx()) {
3120     // Verify that OS save/restore all bits of AVX registers
3121     // during signal processing.
3122     retVal = true;
3123     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3124       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3125         retVal = false;
3126         break;
3127       }
3128     }
3129     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3130     if (retVal == false) {
3131       // Verify that OS save/restore all bits of EVEX registers
3132       // during signal processing.
3133       retVal = true;
3134       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3135         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3136           retVal = false;
3137           break;
3138         }
3139       }
3140     }
3141   }
3142   return retVal;
3143 }
3144 
3145 uint VM_Version::cores_per_cpu() {
3146   uint result = 1;
3147   if (is_intel()) {
3148     bool supports_topology = supports_processor_topology();
3149     if (supports_topology) {
3150       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3151                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3152     }
3153     if (!supports_topology || result == 0) {
3154       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3155     }
3156   } else if (is_amd_family()) {
3157     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3158   } else if (is_zx()) {
3159     bool supports_topology = supports_processor_topology();
3160     if (supports_topology) {
3161       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3162                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3163     }
3164     if (!supports_topology || result == 0) {
3165       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3166     }
3167   }
3168   return result;
3169 }
3170 
3171 uint VM_Version::threads_per_core() {
3172   uint result = 1;
3173   if (is_intel() && supports_processor_topology()) {
3174     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3175   } else if (is_zx() && supports_processor_topology()) {
3176     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3177   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3178     if (cpu_family() >= 0x17) {
3179       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3180     } else {
3181       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3182                  cores_per_cpu();
3183     }
3184   }
3185   return (result == 0 ? 1 : result);
3186 }
3187 
3188 uint VM_Version::L1_line_size() {
3189   uint result = 0;
3190   if (is_intel()) {
3191     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3192   } else if (is_amd_family()) {
3193     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3194   } else if (is_zx()) {
3195     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3196   }
3197   if (result < 32) // not defined ?
3198     result = 32;   // 32 bytes by default on x86 and other x64
3199   return result;
3200 }
3201 
3202 bool VM_Version::is_intel_tsc_synched_at_init() {
3203   if (is_intel_family_core()) {
3204     uint32_t ext_model = extended_cpu_model();
3205     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3206         ext_model == CPU_MODEL_WESTMERE_EP    ||
3207         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3208         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3209       // <= 2-socket invariant tsc support. EX versions are usually used
3210       // in > 2-socket systems and likely don't synchronize tscs at
3211       // initialization.
3212       // Code that uses tsc values must be prepared for them to arbitrarily
3213       // jump forward or backward.
3214       return true;
3215     }
3216   }
3217   return false;
3218 }
3219 
3220 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3221   // Hardware prefetching (distance/size in bytes):
3222   // Pentium 3 -  64 /  32
3223   // Pentium 4 - 256 / 128
3224   // Athlon    -  64 /  32 ????
3225   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3226   // Core      - 128 /  64
3227   //
3228   // Software prefetching (distance in bytes / instruction with best score):
3229   // Pentium 3 - 128 / prefetchnta
3230   // Pentium 4 - 512 / prefetchnta
3231   // Athlon    - 128 / prefetchnta
3232   // Opteron   - 256 / prefetchnta
3233   // Core      - 256 / prefetchnta
3234   // It will be used only when AllocatePrefetchStyle > 0
3235 
3236   if (is_amd_family()) { // AMD | Hygon
3237     if (supports_sse2()) {
3238       return 256; // Opteron
3239     } else {
3240       return 128; // Athlon
3241     }
3242   } else { // Intel
3243     if (supports_sse3() && cpu_family() == 6) {
3244       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3245         return 192;
3246       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3247 #ifdef _LP64
3248         return 384;
3249 #else
3250         return 320;
3251 #endif
3252       }
3253     }
3254     if (supports_sse2()) {
3255       if (cpu_family() == 6) {
3256         return 256; // Pentium M, Core, Core2
3257       } else {
3258         return 512; // Pentium 4
3259       }
3260     } else {
3261       return 128; // Pentium 3 (and all other old CPUs)
3262     }
3263   }
3264 }
3265 
3266 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3267   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3268   switch (id) {
3269   case vmIntrinsics::_floatToFloat16:
3270   case vmIntrinsics::_float16ToFloat:
3271     if (!supports_float16()) {
3272       return false;
3273     }
3274     break;
3275   default:
3276     break;
3277   }
3278   return true;
3279 }