1 /*
   2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = 0;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = 0;
  59 
  60 static BufferBlob* stub_blob;
  61 static const int stub_size = 2000;
  62 
  63 extern "C" {
  64   typedef void (*get_cpu_info_stub_t)(void*);
  65   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  66 }
  67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  68 static detect_virt_stub_t detect_virt_stub = nullptr;
  69 
  70 #ifdef _LP64
  71 
  72 bool VM_Version::supports_clflush() {
  73   // clflush should always be available on x86_64
  74   // if not we are in real trouble because we rely on it
  75   // to flush the code cache.
  76   // Unfortunately, Assembler::clflush is currently called as part
  77   // of generation of the code cache flush routine. This happens
  78   // under Universe::init before the processor features are set
  79   // up. Assembler::flush calls this routine to check that clflush
  80   // is allowed. So, we give the caller a free pass if Universe init
  81   // is still in progress.
  82   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  83   return true;
  84 }
  85 #endif
  86 
  87 #define CPUID_STANDARD_FN   0x0
  88 #define CPUID_STANDARD_FN_1 0x1
  89 #define CPUID_STANDARD_FN_4 0x4
  90 #define CPUID_STANDARD_FN_B 0xb
  91 
  92 #define CPUID_EXTENDED_FN   0x80000000
  93 #define CPUID_EXTENDED_FN_1 0x80000001
  94 #define CPUID_EXTENDED_FN_2 0x80000002
  95 #define CPUID_EXTENDED_FN_3 0x80000003
  96 #define CPUID_EXTENDED_FN_4 0x80000004
  97 #define CPUID_EXTENDED_FN_7 0x80000007
  98 #define CPUID_EXTENDED_FN_8 0x80000008
  99 
 100 class VM_Version_StubGenerator: public StubCodeGenerator {
 101  public:
 102 
 103   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 104 
 105   address generate_get_cpu_info() {
 106     // Flags to test CPU type.
 107     const uint32_t HS_EFL_AC = 0x40000;
 108     const uint32_t HS_EFL_ID = 0x200000;
 109     // Values for when we don't have a CPUID instruction.
 110     const int      CPU_FAMILY_SHIFT = 8;
 111     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 112     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 113     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 114 
 115     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 116     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 117     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 118 
 119     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 120 #   define __ _masm->
 121 
 122     address start = __ pc();
 123 
 124     //
 125     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 126     //
 127     // LP64: rcx and rdx are first and second argument registers on windows
 128 
 129     __ push(rbp);
 130 #ifdef _LP64
 131     __ mov(rbp, c_rarg0); // cpuid_info address
 132 #else
 133     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 134 #endif
 135     __ push(rbx);
 136     __ push(rsi);
 137     __ pushf();          // preserve rbx, and flags
 138     __ pop(rax);
 139     __ push(rax);
 140     __ mov(rcx, rax);
 141     //
 142     // if we are unable to change the AC flag, we have a 386
 143     //
 144     __ xorl(rax, HS_EFL_AC);
 145     __ push(rax);
 146     __ popf();
 147     __ pushf();
 148     __ pop(rax);
 149     __ cmpptr(rax, rcx);
 150     __ jccb(Assembler::notEqual, detect_486);
 151 
 152     __ movl(rax, CPU_FAMILY_386);
 153     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 154     __ jmp(done);
 155 
 156     //
 157     // If we are unable to change the ID flag, we have a 486 which does
 158     // not support the "cpuid" instruction.
 159     //
 160     __ bind(detect_486);
 161     __ mov(rax, rcx);
 162     __ xorl(rax, HS_EFL_ID);
 163     __ push(rax);
 164     __ popf();
 165     __ pushf();
 166     __ pop(rax);
 167     __ cmpptr(rcx, rax);
 168     __ jccb(Assembler::notEqual, detect_586);
 169 
 170     __ bind(cpu486);
 171     __ movl(rax, CPU_FAMILY_486);
 172     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 173     __ jmp(done);
 174 
 175     //
 176     // At this point, we have a chip which supports the "cpuid" instruction
 177     //
 178     __ bind(detect_586);
 179     __ xorl(rax, rax);
 180     __ cpuid();
 181     __ orl(rax, rax);
 182     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 183                                         // value of at least 1, we give up and
 184                                         // assume a 486
 185     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 186     __ movl(Address(rsi, 0), rax);
 187     __ movl(Address(rsi, 4), rbx);
 188     __ movl(Address(rsi, 8), rcx);
 189     __ movl(Address(rsi,12), rdx);
 190 
 191     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 192     __ jccb(Assembler::belowEqual, std_cpuid4);
 193 
 194     //
 195     // cpuid(0xB) Processor Topology
 196     //
 197     __ movl(rax, 0xb);
 198     __ xorl(rcx, rcx);   // Threads level
 199     __ cpuid();
 200 
 201     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 202     __ movl(Address(rsi, 0), rax);
 203     __ movl(Address(rsi, 4), rbx);
 204     __ movl(Address(rsi, 8), rcx);
 205     __ movl(Address(rsi,12), rdx);
 206 
 207     __ movl(rax, 0xb);
 208     __ movl(rcx, 1);     // Cores level
 209     __ cpuid();
 210     __ push(rax);
 211     __ andl(rax, 0x1f);  // Determine if valid topology level
 212     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 213     __ andl(rax, 0xffff);
 214     __ pop(rax);
 215     __ jccb(Assembler::equal, std_cpuid4);
 216 
 217     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 218     __ movl(Address(rsi, 0), rax);
 219     __ movl(Address(rsi, 4), rbx);
 220     __ movl(Address(rsi, 8), rcx);
 221     __ movl(Address(rsi,12), rdx);
 222 
 223     __ movl(rax, 0xb);
 224     __ movl(rcx, 2);     // Packages level
 225     __ cpuid();
 226     __ push(rax);
 227     __ andl(rax, 0x1f);  // Determine if valid topology level
 228     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 229     __ andl(rax, 0xffff);
 230     __ pop(rax);
 231     __ jccb(Assembler::equal, std_cpuid4);
 232 
 233     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 234     __ movl(Address(rsi, 0), rax);
 235     __ movl(Address(rsi, 4), rbx);
 236     __ movl(Address(rsi, 8), rcx);
 237     __ movl(Address(rsi,12), rdx);
 238 
 239     //
 240     // cpuid(0x4) Deterministic cache params
 241     //
 242     __ bind(std_cpuid4);
 243     __ movl(rax, 4);
 244     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 245     __ jccb(Assembler::greater, std_cpuid1);
 246 
 247     __ xorl(rcx, rcx);   // L1 cache
 248     __ cpuid();
 249     __ push(rax);
 250     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 251     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 252     __ pop(rax);
 253     __ jccb(Assembler::equal, std_cpuid1);
 254 
 255     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 256     __ movl(Address(rsi, 0), rax);
 257     __ movl(Address(rsi, 4), rbx);
 258     __ movl(Address(rsi, 8), rcx);
 259     __ movl(Address(rsi,12), rdx);
 260 
 261     //
 262     // Standard cpuid(0x1)
 263     //
 264     __ bind(std_cpuid1);
 265     __ movl(rax, 1);
 266     __ cpuid();
 267     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 268     __ movl(Address(rsi, 0), rax);
 269     __ movl(Address(rsi, 4), rbx);
 270     __ movl(Address(rsi, 8), rcx);
 271     __ movl(Address(rsi,12), rdx);
 272 
 273     //
 274     // Check if OS has enabled XGETBV instruction to access XCR0
 275     // (OSXSAVE feature flag) and CPU supports AVX
 276     //
 277     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 278     __ cmpl(rcx, 0x18000000);
 279     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 280 
 281     //
 282     // XCR0, XFEATURE_ENABLED_MASK register
 283     //
 284     __ xorl(rcx, rcx);   // zero for XCR0 register
 285     __ xgetbv();
 286     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 287     __ movl(Address(rsi, 0), rax);
 288     __ movl(Address(rsi, 4), rdx);
 289 
 290     //
 291     // cpuid(0x7) Structured Extended Features
 292     //
 293     __ bind(sef_cpuid);
 294     __ movl(rax, 7);
 295     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 296     __ jccb(Assembler::greater, ext_cpuid);
 297 
 298     __ xorl(rcx, rcx);
 299     __ cpuid();
 300     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 301     __ movl(Address(rsi, 0), rax);
 302     __ movl(Address(rsi, 4), rbx);
 303     __ movl(Address(rsi, 8), rcx);
 304     __ movl(Address(rsi, 12), rdx);
 305 
 306     //
 307     // Extended cpuid(0x80000000)
 308     //
 309     __ bind(ext_cpuid);
 310     __ movl(rax, 0x80000000);
 311     __ cpuid();
 312     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 313     __ jcc(Assembler::belowEqual, done);
 314     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 315     __ jcc(Assembler::belowEqual, ext_cpuid1);
 316     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 317     __ jccb(Assembler::belowEqual, ext_cpuid5);
 318     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 319     __ jccb(Assembler::belowEqual, ext_cpuid7);
 320     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 321     __ jccb(Assembler::belowEqual, ext_cpuid8);
 322     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 323     __ jccb(Assembler::below, ext_cpuid8);
 324     //
 325     // Extended cpuid(0x8000001E)
 326     //
 327     __ movl(rax, 0x8000001E);
 328     __ cpuid();
 329     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 330     __ movl(Address(rsi, 0), rax);
 331     __ movl(Address(rsi, 4), rbx);
 332     __ movl(Address(rsi, 8), rcx);
 333     __ movl(Address(rsi,12), rdx);
 334 
 335     //
 336     // Extended cpuid(0x80000008)
 337     //
 338     __ bind(ext_cpuid8);
 339     __ movl(rax, 0x80000008);
 340     __ cpuid();
 341     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 342     __ movl(Address(rsi, 0), rax);
 343     __ movl(Address(rsi, 4), rbx);
 344     __ movl(Address(rsi, 8), rcx);
 345     __ movl(Address(rsi,12), rdx);
 346 
 347     //
 348     // Extended cpuid(0x80000007)
 349     //
 350     __ bind(ext_cpuid7);
 351     __ movl(rax, 0x80000007);
 352     __ cpuid();
 353     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 354     __ movl(Address(rsi, 0), rax);
 355     __ movl(Address(rsi, 4), rbx);
 356     __ movl(Address(rsi, 8), rcx);
 357     __ movl(Address(rsi,12), rdx);
 358 
 359     //
 360     // Extended cpuid(0x80000005)
 361     //
 362     __ bind(ext_cpuid5);
 363     __ movl(rax, 0x80000005);
 364     __ cpuid();
 365     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 366     __ movl(Address(rsi, 0), rax);
 367     __ movl(Address(rsi, 4), rbx);
 368     __ movl(Address(rsi, 8), rcx);
 369     __ movl(Address(rsi,12), rdx);
 370 
 371     //
 372     // Extended cpuid(0x80000001)
 373     //
 374     __ bind(ext_cpuid1);
 375     __ movl(rax, 0x80000001);
 376     __ cpuid();
 377     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 378     __ movl(Address(rsi, 0), rax);
 379     __ movl(Address(rsi, 4), rbx);
 380     __ movl(Address(rsi, 8), rcx);
 381     __ movl(Address(rsi,12), rdx);
 382 
 383     //
 384     // Check if OS has enabled XGETBV instruction to access XCR0
 385     // (OSXSAVE feature flag) and CPU supports AVX
 386     //
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 388     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 389     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 390     __ cmpl(rcx, 0x18000000);
 391     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 392 
 393     __ movl(rax, 0x6);
 394     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 395     __ cmpl(rax, 0x6);
 396     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 397 
 398     // we need to bridge farther than imm8, so we use this island as a thunk
 399     __ bind(done);
 400     __ jmp(wrapup);
 401 
 402     __ bind(start_simd_check);
 403     //
 404     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 405     // registers are not restored after a signal processing.
 406     // Generate SEGV here (reference through null)
 407     // and check upper YMM/ZMM bits after it.
 408     //
 409     int saved_useavx = UseAVX;
 410     int saved_usesse = UseSSE;
 411 
 412     // If UseAVX is uninitialized or is set by the user to include EVEX
 413     if (use_evex) {
 414       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 415       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 416       __ movl(rax, 0x10000);
 417       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 418       __ cmpl(rax, 0x10000);
 419       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 420       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 421       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 422       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 423       __ movl(rax, 0xE0);
 424       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 425       __ cmpl(rax, 0xE0);
 426       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 427 
 428       if (FLAG_IS_DEFAULT(UseAVX)) {
 429         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 430         __ movl(rax, Address(rsi, 0));
 431         __ cmpl(rax, 0x50654);              // If it is Skylake
 432         __ jcc(Assembler::equal, legacy_setup);
 433       }
 434       // EVEX setup: run in lowest evex mode
 435       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 436       UseAVX = 3;
 437       UseSSE = 2;
 438 #ifdef _WINDOWS
 439       // xmm5-xmm15 are not preserved by caller on windows
 440       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 441       __ subptr(rsp, 64);
 442       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 443 #ifdef _LP64
 444       __ subptr(rsp, 64);
 445       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 446       __ subptr(rsp, 64);
 447       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 448 #endif // _LP64
 449 #endif // _WINDOWS
 450 
 451       // load value into all 64 bytes of zmm7 register
 452       __ movl(rcx, VM_Version::ymm_test_value());
 453       __ movdl(xmm0, rcx);
 454       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 455       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 456 #ifdef _LP64
 457       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 458       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 459 #endif
 460       VM_Version::clean_cpuFeatures();
 461       __ jmp(save_restore_except);
 462     }
 463 
 464     __ bind(legacy_setup);
 465     // AVX setup
 466     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 467     UseAVX = 1;
 468     UseSSE = 2;
 469 #ifdef _WINDOWS
 470     __ subptr(rsp, 32);
 471     __ vmovdqu(Address(rsp, 0), xmm7);
 472 #ifdef _LP64
 473     __ subptr(rsp, 32);
 474     __ vmovdqu(Address(rsp, 0), xmm8);
 475     __ subptr(rsp, 32);
 476     __ vmovdqu(Address(rsp, 0), xmm15);
 477 #endif // _LP64
 478 #endif // _WINDOWS
 479 
 480     // load value into all 32 bytes of ymm7 register
 481     __ movl(rcx, VM_Version::ymm_test_value());
 482 
 483     __ movdl(xmm0, rcx);
 484     __ pshufd(xmm0, xmm0, 0x00);
 485     __ vinsertf128_high(xmm0, xmm0);
 486     __ vmovdqu(xmm7, xmm0);
 487 #ifdef _LP64
 488     __ vmovdqu(xmm8, xmm0);
 489     __ vmovdqu(xmm15, xmm0);
 490 #endif
 491     VM_Version::clean_cpuFeatures();
 492 
 493     __ bind(save_restore_except);
 494     __ xorl(rsi, rsi);
 495     VM_Version::set_cpuinfo_segv_addr(__ pc());
 496     // Generate SEGV
 497     __ movl(rax, Address(rsi, 0));
 498 
 499     VM_Version::set_cpuinfo_cont_addr(__ pc());
 500     // Returns here after signal. Save xmm0 to check it later.
 501 
 502     // If UseAVX is uninitialized or is set by the user to include EVEX
 503     if (use_evex) {
 504       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 505       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 506       __ movl(rax, 0x10000);
 507       __ andl(rax, Address(rsi, 4));
 508       __ cmpl(rax, 0x10000);
 509       __ jcc(Assembler::notEqual, legacy_save_restore);
 510       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 511       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 512       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 513       __ movl(rax, 0xE0);
 514       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 515       __ cmpl(rax, 0xE0);
 516       __ jcc(Assembler::notEqual, legacy_save_restore);
 517 
 518       if (FLAG_IS_DEFAULT(UseAVX)) {
 519         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 520         __ movl(rax, Address(rsi, 0));
 521         __ cmpl(rax, 0x50654);              // If it is Skylake
 522         __ jcc(Assembler::equal, legacy_save_restore);
 523       }
 524       // EVEX check: run in lowest evex mode
 525       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 526       UseAVX = 3;
 527       UseSSE = 2;
 528       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 529       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 530       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 531 #ifdef _LP64
 532       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 533       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 534 #endif
 535 
 536 #ifdef _WINDOWS
 537 #ifdef _LP64
 538       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 539       __ addptr(rsp, 64);
 540       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 541       __ addptr(rsp, 64);
 542 #endif // _LP64
 543       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 544       __ addptr(rsp, 64);
 545 #endif // _WINDOWS
 546       generate_vzeroupper(wrapup);
 547       VM_Version::clean_cpuFeatures();
 548       UseAVX = saved_useavx;
 549       UseSSE = saved_usesse;
 550       __ jmp(wrapup);
 551    }
 552 
 553     __ bind(legacy_save_restore);
 554     // AVX check
 555     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 556     UseAVX = 1;
 557     UseSSE = 2;
 558     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 559     __ vmovdqu(Address(rsi, 0), xmm0);
 560     __ vmovdqu(Address(rsi, 32), xmm7);
 561 #ifdef _LP64
 562     __ vmovdqu(Address(rsi, 64), xmm8);
 563     __ vmovdqu(Address(rsi, 96), xmm15);
 564 #endif
 565 
 566 #ifdef _WINDOWS
 567 #ifdef _LP64
 568     __ vmovdqu(xmm15, Address(rsp, 0));
 569     __ addptr(rsp, 32);
 570     __ vmovdqu(xmm8, Address(rsp, 0));
 571     __ addptr(rsp, 32);
 572 #endif // _LP64
 573     __ vmovdqu(xmm7, Address(rsp, 0));
 574     __ addptr(rsp, 32);
 575 #endif // _WINDOWS
 576     generate_vzeroupper(wrapup);
 577     VM_Version::clean_cpuFeatures();
 578     UseAVX = saved_useavx;
 579     UseSSE = saved_usesse;
 580 
 581     __ bind(wrapup);
 582     __ popf();
 583     __ pop(rsi);
 584     __ pop(rbx);
 585     __ pop(rbp);
 586     __ ret(0);
 587 
 588 #   undef __
 589 
 590     return start;
 591   };
 592   void generate_vzeroupper(Label& L_wrapup) {
 593 #   define __ _masm->
 594     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 595     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 596     __ jcc(Assembler::notEqual, L_wrapup);
 597     __ movl(rcx, 0x0FFF0FF0);
 598     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 599     __ andl(rcx, Address(rsi, 0));
 600     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 601     __ jcc(Assembler::equal, L_wrapup);
 602     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 603     __ jcc(Assembler::equal, L_wrapup);
 604     // vzeroupper() will use a pre-computed instruction sequence that we
 605     // can't compute until after we've determined CPU capabilities. Use
 606     // uncached variant here directly to be able to bootstrap correctly
 607     __ vzeroupper_uncached();
 608 #   undef __
 609   }
 610   address generate_detect_virt() {
 611     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 612 #   define __ _masm->
 613 
 614     address start = __ pc();
 615 
 616     // Evacuate callee-saved registers
 617     __ push(rbp);
 618     __ push(rbx);
 619     __ push(rsi); // for Windows
 620 
 621 #ifdef _LP64
 622     __ mov(rax, c_rarg0); // CPUID leaf
 623     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 624 #else
 625     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 626     __ movptr(rsi, Address(rsp, 20)); // register array address
 627 #endif
 628 
 629     __ cpuid();
 630 
 631     // Store result to register array
 632     __ movl(Address(rsi,  0), rax);
 633     __ movl(Address(rsi,  4), rbx);
 634     __ movl(Address(rsi,  8), rcx);
 635     __ movl(Address(rsi, 12), rdx);
 636 
 637     // Epilogue
 638     __ pop(rsi);
 639     __ pop(rbx);
 640     __ pop(rbp);
 641     __ ret(0);
 642 
 643 #   undef __
 644 
 645     return start;
 646   };
 647 
 648 
 649   address generate_getCPUIDBrandString(void) {
 650     // Flags to test CPU type.
 651     const uint32_t HS_EFL_AC           = 0x40000;
 652     const uint32_t HS_EFL_ID           = 0x200000;
 653     // Values for when we don't have a CPUID instruction.
 654     const int      CPU_FAMILY_SHIFT = 8;
 655     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 656     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 657 
 658     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 659 
 660     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 661 #   define __ _masm->
 662 
 663     address start = __ pc();
 664 
 665     //
 666     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 667     //
 668     // LP64: rcx and rdx are first and second argument registers on windows
 669 
 670     __ push(rbp);
 671 #ifdef _LP64
 672     __ mov(rbp, c_rarg0); // cpuid_info address
 673 #else
 674     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 675 #endif
 676     __ push(rbx);
 677     __ push(rsi);
 678     __ pushf();          // preserve rbx, and flags
 679     __ pop(rax);
 680     __ push(rax);
 681     __ mov(rcx, rax);
 682     //
 683     // if we are unable to change the AC flag, we have a 386
 684     //
 685     __ xorl(rax, HS_EFL_AC);
 686     __ push(rax);
 687     __ popf();
 688     __ pushf();
 689     __ pop(rax);
 690     __ cmpptr(rax, rcx);
 691     __ jccb(Assembler::notEqual, detect_486);
 692 
 693     __ movl(rax, CPU_FAMILY_386);
 694     __ jmp(done);
 695 
 696     //
 697     // If we are unable to change the ID flag, we have a 486 which does
 698     // not support the "cpuid" instruction.
 699     //
 700     __ bind(detect_486);
 701     __ mov(rax, rcx);
 702     __ xorl(rax, HS_EFL_ID);
 703     __ push(rax);
 704     __ popf();
 705     __ pushf();
 706     __ pop(rax);
 707     __ cmpptr(rcx, rax);
 708     __ jccb(Assembler::notEqual, detect_586);
 709 
 710     __ bind(cpu486);
 711     __ movl(rax, CPU_FAMILY_486);
 712     __ jmp(done);
 713 
 714     //
 715     // At this point, we have a chip which supports the "cpuid" instruction
 716     //
 717     __ bind(detect_586);
 718     __ xorl(rax, rax);
 719     __ cpuid();
 720     __ orl(rax, rax);
 721     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 722                                         // value of at least 1, we give up and
 723                                         // assume a 486
 724 
 725     //
 726     // Extended cpuid(0x80000000) for processor brand string detection
 727     //
 728     __ bind(ext_cpuid);
 729     __ movl(rax, CPUID_EXTENDED_FN);
 730     __ cpuid();
 731     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 732     __ jcc(Assembler::below, done);
 733 
 734     //
 735     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 736     //
 737     __ movl(rax, CPUID_EXTENDED_FN_2);
 738     __ cpuid();
 739     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 740     __ movl(Address(rsi, 0), rax);
 741     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 742     __ movl(Address(rsi, 0), rbx);
 743     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 744     __ movl(Address(rsi, 0), rcx);
 745     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 746     __ movl(Address(rsi,0), rdx);
 747 
 748     //
 749     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 750     //
 751     __ movl(rax, CPUID_EXTENDED_FN_3);
 752     __ cpuid();
 753     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 754     __ movl(Address(rsi, 0), rax);
 755     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 756     __ movl(Address(rsi, 0), rbx);
 757     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 758     __ movl(Address(rsi, 0), rcx);
 759     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 760     __ movl(Address(rsi,0), rdx);
 761 
 762     //
 763     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 764     //
 765     __ movl(rax, CPUID_EXTENDED_FN_4);
 766     __ cpuid();
 767     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 768     __ movl(Address(rsi, 0), rax);
 769     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 770     __ movl(Address(rsi, 0), rbx);
 771     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 772     __ movl(Address(rsi, 0), rcx);
 773     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 774     __ movl(Address(rsi,0), rdx);
 775 
 776     //
 777     // return
 778     //
 779     __ bind(done);
 780     __ popf();
 781     __ pop(rsi);
 782     __ pop(rbx);
 783     __ pop(rbp);
 784     __ ret(0);
 785 
 786 #   undef __
 787 
 788     return start;
 789   };
 790 };
 791 
 792 void VM_Version::get_processor_features() {
 793 
 794   _cpu = 4; // 486 by default
 795   _model = 0;
 796   _stepping = 0;
 797   _features = 0;
 798   _logical_processors_per_package = 1;
 799   // i486 internal cache is both I&D and has a 16-byte line size
 800   _L1_data_cache_line_size = 16;
 801 
 802   // Get raw processor info
 803 
 804   get_cpu_info_stub(&_cpuid_info);
 805 
 806   assert_is_initialized();
 807   _cpu = extended_cpu_family();
 808   _model = extended_cpu_model();
 809   _stepping = cpu_stepping();
 810 
 811   if (cpu_family() > 4) { // it supports CPUID
 812     _features = feature_flags(); // These can be changed by VM settings
 813     _cpu_features = _features;   // Preserve features
 814     // Logical processors are only available on P4s and above,
 815     // and only if hyperthreading is available.
 816     _logical_processors_per_package = logical_processor_count();
 817     _L1_data_cache_line_size = L1_line_size();
 818   }
 819 
 820   // xchg and xadd instructions
 821   _supports_atomic_getset4 = true;
 822   _supports_atomic_getadd4 = true;
 823   LP64_ONLY(_supports_atomic_getset8 = true);
 824   LP64_ONLY(_supports_atomic_getadd8 = true);
 825 
 826 #ifdef _LP64
 827   // OS should support SSE for x64 and hardware should support at least SSE2.
 828   if (!VM_Version::supports_sse2()) {
 829     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 830   }
 831   // in 64 bit the use of SSE2 is the minimum
 832   if (UseSSE < 2) UseSSE = 2;
 833 #endif
 834 
 835 #ifdef AMD64
 836   // flush_icache_stub have to be generated first.
 837   // That is why Icache line size is hard coded in ICache class,
 838   // see icache_x86.hpp. It is also the reason why we can't use
 839   // clflush instruction in 32-bit VM since it could be running
 840   // on CPU which does not support it.
 841   //
 842   // The only thing we can do is to verify that flushed
 843   // ICache::line_size has correct value.
 844   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 845   // clflush_size is size in quadwords (8 bytes).
 846   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 847 #endif
 848 
 849 #ifdef _LP64
 850   // assigning this field effectively enables Unsafe.writebackMemory()
 851   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 852   // that is only implemented on x86_64 and only if the OS plays ball
 853   if (os::supports_map_sync()) {
 854     // publish data cache line flush size to generic field, otherwise
 855     // let if default to zero thereby disabling writeback
 856     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 857   }
 858 #endif
 859 
 860   // Check if processor has Intel Ecore
 861   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 862     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
 863     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 864   }
 865 
 866   if (UseSSE < 4) {
 867     _features &= ~CPU_SSE4_1;
 868     _features &= ~CPU_SSE4_2;
 869   }
 870 
 871   if (UseSSE < 3) {
 872     _features &= ~CPU_SSE3;
 873     _features &= ~CPU_SSSE3;
 874     _features &= ~CPU_SSE4A;
 875   }
 876 
 877   if (UseSSE < 2)
 878     _features &= ~CPU_SSE2;
 879 
 880   if (UseSSE < 1)
 881     _features &= ~CPU_SSE;
 882 
 883   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 884   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 885     UseAVX = 0;
 886   }
 887 
 888   // UseSSE is set to the smaller of what hardware supports and what
 889   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 890   // older Pentiums which do not support it.
 891   int use_sse_limit = 0;
 892   if (UseSSE > 0) {
 893     if (UseSSE > 3 && supports_sse4_1()) {
 894       use_sse_limit = 4;
 895     } else if (UseSSE > 2 && supports_sse3()) {
 896       use_sse_limit = 3;
 897     } else if (UseSSE > 1 && supports_sse2()) {
 898       use_sse_limit = 2;
 899     } else if (UseSSE > 0 && supports_sse()) {
 900       use_sse_limit = 1;
 901     } else {
 902       use_sse_limit = 0;
 903     }
 904   }
 905   if (FLAG_IS_DEFAULT(UseSSE)) {
 906     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 907   } else if (UseSSE > use_sse_limit) {
 908     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 909     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 910   }
 911 
 912   // first try initial setting and detect what we can support
 913   int use_avx_limit = 0;
 914   if (UseAVX > 0) {
 915     if (UseSSE < 4) {
 916       // Don't use AVX if SSE is unavailable or has been disabled.
 917       use_avx_limit = 0;
 918     } else if (UseAVX > 2 && supports_evex()) {
 919       use_avx_limit = 3;
 920     } else if (UseAVX > 1 && supports_avx2()) {
 921       use_avx_limit = 2;
 922     } else if (UseAVX > 0 && supports_avx()) {
 923       use_avx_limit = 1;
 924     } else {
 925       use_avx_limit = 0;
 926     }
 927   }
 928   if (FLAG_IS_DEFAULT(UseAVX)) {
 929     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 930     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 931       FLAG_SET_DEFAULT(UseAVX, 2);
 932     } else {
 933       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 934     }
 935   }
 936   if (UseAVX > use_avx_limit) {
 937     if (UseSSE < 4) {
 938       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 939     } else {
 940       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 941     }
 942     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 943   }
 944 
 945   if (UseAVX < 3) {
 946     _features &= ~CPU_AVX512F;
 947     _features &= ~CPU_AVX512DQ;
 948     _features &= ~CPU_AVX512CD;
 949     _features &= ~CPU_AVX512BW;
 950     _features &= ~CPU_AVX512VL;
 951     _features &= ~CPU_AVX512_VPOPCNTDQ;
 952     _features &= ~CPU_AVX512_VPCLMULQDQ;
 953     _features &= ~CPU_AVX512_VAES;
 954     _features &= ~CPU_AVX512_VNNI;
 955     _features &= ~CPU_AVX512_VBMI;
 956     _features &= ~CPU_AVX512_VBMI2;
 957     _features &= ~CPU_AVX512_BITALG;
 958     _features &= ~CPU_AVX512_IFMA;
 959   }
 960 
 961   if (UseAVX < 2)
 962     _features &= ~CPU_AVX2;
 963 
 964   if (UseAVX < 1) {
 965     _features &= ~CPU_AVX;
 966     _features &= ~CPU_VZEROUPPER;
 967     _features &= ~CPU_F16C;
 968   }
 969 
 970   if (logical_processors_per_package() == 1) {
 971     // HT processor could be installed on a system which doesn't support HT.
 972     _features &= ~CPU_HT;
 973   }
 974 
 975   if (is_intel()) { // Intel cpus specific settings
 976     if (is_knights_family()) {
 977       _features &= ~CPU_VZEROUPPER;
 978       _features &= ~CPU_AVX512BW;
 979       _features &= ~CPU_AVX512VL;
 980       _features &= ~CPU_AVX512DQ;
 981       _features &= ~CPU_AVX512_VNNI;
 982       _features &= ~CPU_AVX512_VAES;
 983       _features &= ~CPU_AVX512_VPOPCNTDQ;
 984       _features &= ~CPU_AVX512_VPCLMULQDQ;
 985       _features &= ~CPU_AVX512_VBMI;
 986       _features &= ~CPU_AVX512_VBMI2;
 987       _features &= ~CPU_CLWB;
 988       _features &= ~CPU_FLUSHOPT;
 989       _features &= ~CPU_GFNI;
 990       _features &= ~CPU_AVX512_BITALG;
 991       _features &= ~CPU_AVX512_IFMA;
 992     }
 993   }
 994 
 995   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
 996     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
 997   } else {
 998     _has_intel_jcc_erratum = IntelJccErratumMitigation;
 999   }
1000 
1001   char buf[1024];
1002   int res = jio_snprintf(
1003               buf, sizeof(buf),
1004               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1005               cores_per_cpu(), threads_per_core(),
1006               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1007   assert(res > 0, "not enough temporary space allocated");
1008   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1009 
1010   _features_string = os::strdup(buf);
1011 
1012   // Use AES instructions if available.
1013   if (supports_aes()) {
1014     if (FLAG_IS_DEFAULT(UseAES)) {
1015       FLAG_SET_DEFAULT(UseAES, true);
1016     }
1017     if (!UseAES) {
1018       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1019         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1020       }
1021       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1022     } else {
1023       if (UseSSE > 2) {
1024         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1025           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1026         }
1027       } else {
1028         // The AES intrinsic stubs require AES instruction support (of course)
1029         // but also require sse3 mode or higher for instructions it use.
1030         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1031           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1032         }
1033         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1034       }
1035 
1036       // --AES-CTR begins--
1037       if (!UseAESIntrinsics) {
1038         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1039           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1040           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1041         }
1042       } else {
1043         if (supports_sse4_1()) {
1044           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1045             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1046           }
1047         } else {
1048            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1049            // but also require sse4.1 mode or higher for instructions it use.
1050           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1051              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1052            }
1053            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1054         }
1055       }
1056       // --AES-CTR ends--
1057     }
1058   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1059     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1060       warning("AES instructions are not available on this CPU");
1061       FLAG_SET_DEFAULT(UseAES, false);
1062     }
1063     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1064       warning("AES intrinsics are not available on this CPU");
1065       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1066     }
1067     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1068       warning("AES-CTR intrinsics are not available on this CPU");
1069       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1070     }
1071   }
1072 
1073   // Use CLMUL instructions if available.
1074   if (supports_clmul()) {
1075     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1076       UseCLMUL = true;
1077     }
1078   } else if (UseCLMUL) {
1079     if (!FLAG_IS_DEFAULT(UseCLMUL))
1080       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1081     FLAG_SET_DEFAULT(UseCLMUL, false);
1082   }
1083 
1084   if (UseCLMUL && (UseSSE > 2)) {
1085     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1086       UseCRC32Intrinsics = true;
1087     }
1088   } else if (UseCRC32Intrinsics) {
1089     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1090       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1091     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1092   }
1093 
1094 #ifdef _LP64
1095   if (supports_avx2()) {
1096     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1097       UseAdler32Intrinsics = true;
1098     }
1099   } else if (UseAdler32Intrinsics) {
1100     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1101       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1102     }
1103     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1104   }
1105 #else
1106   if (UseAdler32Intrinsics) {
1107     warning("Adler32Intrinsics not available on this CPU.");
1108     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1109   }
1110 #endif
1111 
1112   if (supports_sse4_2() && supports_clmul()) {
1113     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1114       UseCRC32CIntrinsics = true;
1115     }
1116   } else if (UseCRC32CIntrinsics) {
1117     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1118       warning("CRC32C intrinsics are not available on this CPU");
1119     }
1120     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1121   }
1122 
1123   // GHASH/GCM intrinsics
1124   if (UseCLMUL && (UseSSE > 2)) {
1125     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1126       UseGHASHIntrinsics = true;
1127     }
1128   } else if (UseGHASHIntrinsics) {
1129     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1130       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1131     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1132   }
1133 
1134 #ifdef _LP64
1135   // ChaCha20 Intrinsics
1136   // As long as the system supports AVX as a baseline we can do a
1137   // SIMD-enabled block function.  StubGenerator makes the determination
1138   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1139   // version.
1140   if (UseAVX >= 1) {
1141       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1142           UseChaCha20Intrinsics = true;
1143       }
1144   } else if (UseChaCha20Intrinsics) {
1145       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1146           warning("ChaCha20 intrinsic requires AVX instructions");
1147       }
1148       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1149   }
1150 #else
1151   // No support currently for ChaCha20 intrinsics on 32-bit platforms
1152   if (UseChaCha20Intrinsics) {
1153       warning("ChaCha20 intrinsics are not available on this CPU.");
1154       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1155   }
1156 #endif // _LP64
1157 
1158   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1159   if (UseAVX >= 2) {
1160     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1161       UseBASE64Intrinsics = true;
1162     }
1163   } else if (UseBASE64Intrinsics) {
1164      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1165       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1166     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1167   }
1168 
1169   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1170     if (FLAG_IS_DEFAULT(UseFMA)) {
1171       UseFMA = true;
1172     }
1173   } else if (UseFMA) {
1174     warning("FMA instructions are not available on this CPU");
1175     FLAG_SET_DEFAULT(UseFMA, false);
1176   }
1177 
1178   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1179     UseMD5Intrinsics = true;
1180   }
1181 
1182   if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1183     if (FLAG_IS_DEFAULT(UseSHA)) {
1184       UseSHA = true;
1185     }
1186   } else if (UseSHA) {
1187     warning("SHA instructions are not available on this CPU");
1188     FLAG_SET_DEFAULT(UseSHA, false);
1189   }
1190 
1191   if (supports_sha() && supports_sse4_1() && UseSHA) {
1192     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1193       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1194     }
1195   } else if (UseSHA1Intrinsics) {
1196     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1197     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1198   }
1199 
1200   if (supports_sse4_1() && UseSHA) {
1201     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1202       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1203     }
1204   } else if (UseSHA256Intrinsics) {
1205     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1206     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1207   }
1208 
1209 #ifdef _LP64
1210   // These are only supported on 64-bit
1211   if (UseSHA && supports_avx2() && supports_bmi2()) {
1212     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1213       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1214     }
1215   } else
1216 #endif
1217   if (UseSHA512Intrinsics) {
1218     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1219     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1220   }
1221 
1222   if (UseSHA3Intrinsics) {
1223     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1224     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1225   }
1226 
1227   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1228     FLAG_SET_DEFAULT(UseSHA, false);
1229   }
1230 
1231   if (!supports_rtm() && UseRTMLocking) {
1232     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1233   }
1234 
1235 #if INCLUDE_RTM_OPT
1236   if (UseRTMLocking) {
1237     if (!CompilerConfig::is_c2_enabled()) {
1238       // Only C2 does RTM locking optimization.
1239       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1240     }
1241     if (is_intel_family_core()) {
1242       if ((_model == CPU_MODEL_HASWELL_E3) ||
1243           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1244           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1245         // currently a collision between SKL and HSW_E3
1246         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1247           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1248                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1249         } else {
1250           warning("UseRTMLocking is only available as experimental option on this platform.");
1251         }
1252       }
1253     }
1254     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1255       // RTM locking should be used only for applications with
1256       // high lock contention. For now we do not use it by default.
1257       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1258     }
1259   } else { // !UseRTMLocking
1260     if (UseRTMForStackLocks) {
1261       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1262         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1263       }
1264       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1265     }
1266     if (UseRTMDeopt) {
1267       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1268     }
1269     if (PrintPreciseRTMLockingStatistics) {
1270       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1271     }
1272   }
1273 #else
1274   if (UseRTMLocking) {
1275     // Only C2 does RTM locking optimization.
1276     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1277   }
1278 #endif
1279 
1280 #ifdef COMPILER2
1281   if (UseFPUForSpilling) {
1282     if (UseSSE < 2) {
1283       // Only supported with SSE2+
1284       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1285     }
1286   }
1287 #endif
1288 
1289 #if COMPILER2_OR_JVMCI
1290   int max_vector_size = 0;
1291   if (UseSSE < 2) {
1292     // Vectors (in XMM) are only supported with SSE2+
1293     // SSE is always 2 on x64.
1294     max_vector_size = 0;
1295   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1296     // 16 byte vectors (in XMM) are supported with SSE2+
1297     max_vector_size = 16;
1298   } else if (UseAVX == 1 || UseAVX == 2) {
1299     // 32 bytes vectors (in YMM) are only supported with AVX+
1300     max_vector_size = 32;
1301   } else if (UseAVX > 2) {
1302     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1303     max_vector_size = 64;
1304   }
1305 
1306 #ifdef _LP64
1307   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1308 #else
1309   int min_vector_size = 0;
1310 #endif
1311 
1312   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1313     if (MaxVectorSize < min_vector_size) {
1314       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1315       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1316     }
1317     if (MaxVectorSize > max_vector_size) {
1318       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1319       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1320     }
1321     if (!is_power_of_2(MaxVectorSize)) {
1322       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1323       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1324     }
1325   } else {
1326     // If default, use highest supported configuration
1327     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1328   }
1329 
1330 #if defined(COMPILER2) && defined(ASSERT)
1331   if (MaxVectorSize > 0) {
1332     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1333       tty->print_cr("State of YMM registers after signal handle:");
1334       int nreg = 2 LP64_ONLY(+2);
1335       const char* ymm_name[4] = {"0", "7", "8", "15"};
1336       for (int i = 0; i < nreg; i++) {
1337         tty->print("YMM%s:", ymm_name[i]);
1338         for (int j = 7; j >=0; j--) {
1339           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1340         }
1341         tty->cr();
1342       }
1343     }
1344   }
1345 #endif // COMPILER2 && ASSERT
1346 
1347 #ifdef _LP64
1348   if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) {
1349     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1350       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1351     }
1352   } else
1353 #endif
1354   if (UsePoly1305Intrinsics) {
1355     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1356     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1357   }
1358 
1359 #ifdef _LP64
1360   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1361     UseMultiplyToLenIntrinsic = true;
1362   }
1363   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1364     UseSquareToLenIntrinsic = true;
1365   }
1366   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1367     UseMulAddIntrinsic = true;
1368   }
1369   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1370     UseMontgomeryMultiplyIntrinsic = true;
1371   }
1372   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1373     UseMontgomerySquareIntrinsic = true;
1374   }
1375 #else
1376   if (UseMultiplyToLenIntrinsic) {
1377     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1378       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1379     }
1380     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1381   }
1382   if (UseMontgomeryMultiplyIntrinsic) {
1383     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1384       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1385     }
1386     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1387   }
1388   if (UseMontgomerySquareIntrinsic) {
1389     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1390       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1391     }
1392     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1393   }
1394   if (UseSquareToLenIntrinsic) {
1395     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1396       warning("squareToLen intrinsic is not available in 32-bit VM");
1397     }
1398     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1399   }
1400   if (UseMulAddIntrinsic) {
1401     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1402       warning("mulAdd intrinsic is not available in 32-bit VM");
1403     }
1404     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1405   }
1406 #endif // _LP64
1407 #endif // COMPILER2_OR_JVMCI
1408 
1409   // On new cpus instructions which update whole XMM register should be used
1410   // to prevent partial register stall due to dependencies on high half.
1411   //
1412   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1413   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1414   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1415   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1416 
1417 
1418   if (is_zx()) { // ZX cpus specific settings
1419     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1420       UseStoreImmI16 = false; // don't use it on ZX cpus
1421     }
1422     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1423       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1424         // Use it on all ZX cpus
1425         UseAddressNop = true;
1426       }
1427     }
1428     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1429       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1430     }
1431     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1432       if (supports_sse3()) {
1433         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1434       } else {
1435         UseXmmRegToRegMoveAll = false;
1436       }
1437     }
1438     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1439 #ifdef COMPILER2
1440       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1441         // For new ZX cpus do the next optimization:
1442         // don't align the beginning of a loop if there are enough instructions
1443         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1444         // in current fetch line (OptoLoopAlignment) or the padding
1445         // is big (> MaxLoopPad).
1446         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1447         // generated NOP instructions. 11 is the largest size of one
1448         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1449         MaxLoopPad = 11;
1450       }
1451 #endif // COMPILER2
1452       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1453         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1454       }
1455       if (supports_sse4_2()) { // new ZX cpus
1456         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1457           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1458         }
1459       }
1460       if (supports_sse4_2()) {
1461         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1462           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1463         }
1464       } else {
1465         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1466           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1467         }
1468         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1469       }
1470     }
1471 
1472     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1473       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1474     }
1475   }
1476 
1477   if (is_amd_family()) { // AMD cpus specific settings
1478     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1479       // Use it on new AMD cpus starting from Opteron.
1480       UseAddressNop = true;
1481     }
1482     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1483       // Use it on new AMD cpus starting from Opteron.
1484       UseNewLongLShift = true;
1485     }
1486     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1487       if (supports_sse4a()) {
1488         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1489       } else {
1490         UseXmmLoadAndClearUpper = false;
1491       }
1492     }
1493     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1494       if (supports_sse4a()) {
1495         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1496       } else {
1497         UseXmmRegToRegMoveAll = false;
1498       }
1499     }
1500     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1501       if (supports_sse4a()) {
1502         UseXmmI2F = true;
1503       } else {
1504         UseXmmI2F = false;
1505       }
1506     }
1507     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1508       if (supports_sse4a()) {
1509         UseXmmI2D = true;
1510       } else {
1511         UseXmmI2D = false;
1512       }
1513     }
1514     if (supports_sse4_2()) {
1515       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1516         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1517       }
1518     } else {
1519       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1520         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1521       }
1522       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1523     }
1524 
1525     // some defaults for AMD family 15h
1526     if (cpu_family() == 0x15) {
1527       // On family 15h processors default is no sw prefetch
1528       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1529         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1530       }
1531       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1532       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1533         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1534       }
1535       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1536       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1537         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1538       }
1539       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1540         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1541       }
1542     }
1543 
1544 #ifdef COMPILER2
1545     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1546       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1547       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1548     }
1549 #endif // COMPILER2
1550 
1551     // Some defaults for AMD family >= 17h && Hygon family 18h
1552     if (cpu_family() >= 0x17) {
1553       // On family >=17h processors use XMM and UnalignedLoadStores
1554       // for Array Copy
1555       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1556         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1557       }
1558       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1559         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1560       }
1561 #ifdef COMPILER2
1562       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1563         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1564       }
1565 #endif
1566     }
1567   }
1568 
1569   if (is_intel()) { // Intel cpus specific settings
1570     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1571       UseStoreImmI16 = false; // don't use it on Intel cpus
1572     }
1573     if (cpu_family() == 6 || cpu_family() == 15) {
1574       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1575         // Use it on all Intel cpus starting from PentiumPro
1576         UseAddressNop = true;
1577       }
1578     }
1579     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1580       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1581     }
1582     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1583       if (supports_sse3()) {
1584         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1585       } else {
1586         UseXmmRegToRegMoveAll = false;
1587       }
1588     }
1589     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1590 #ifdef COMPILER2
1591       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1592         // For new Intel cpus do the next optimization:
1593         // don't align the beginning of a loop if there are enough instructions
1594         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1595         // in current fetch line (OptoLoopAlignment) or the padding
1596         // is big (> MaxLoopPad).
1597         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1598         // generated NOP instructions. 11 is the largest size of one
1599         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1600         MaxLoopPad = 11;
1601       }
1602 #endif // COMPILER2
1603 
1604       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1605         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1606       }
1607       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1608         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1609           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1610         }
1611       }
1612       if (supports_sse4_2()) {
1613         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1614           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1615         }
1616       } else {
1617         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1618           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1619         }
1620         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1621       }
1622     }
1623     if (is_atom_family() || is_knights_family()) {
1624 #ifdef COMPILER2
1625       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1626         OptoScheduling = true;
1627       }
1628 #endif
1629       if (supports_sse4_2()) { // Silvermont
1630         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1631           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1632         }
1633       }
1634       if (FLAG_IS_DEFAULT(UseIncDec)) {
1635         FLAG_SET_DEFAULT(UseIncDec, false);
1636       }
1637     }
1638     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1639       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1640     }
1641 #ifdef COMPILER2
1642     if (UseAVX > 2) {
1643       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1644           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1645            ArrayOperationPartialInlineSize != 0 &&
1646            ArrayOperationPartialInlineSize != 16 &&
1647            ArrayOperationPartialInlineSize != 32 &&
1648            ArrayOperationPartialInlineSize != 64)) {
1649         int inline_size = 0;
1650         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1651           inline_size = 64;
1652         } else if (MaxVectorSize >= 32) {
1653           inline_size = 32;
1654         } else if (MaxVectorSize >= 16) {
1655           inline_size = 16;
1656         }
1657         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1658           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1659         }
1660         ArrayOperationPartialInlineSize = inline_size;
1661       }
1662 
1663       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1664         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1665         if (ArrayOperationPartialInlineSize) {
1666           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1667         } else {
1668           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1669         }
1670       }
1671     }
1672 #endif
1673   }
1674 
1675 #ifdef COMPILER2
1676   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1677     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1678       OptimizeFill = false;
1679     }
1680   }
1681 #endif
1682 
1683 #ifdef _LP64
1684   if (UseSSE42Intrinsics) {
1685     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1686       UseVectorizedMismatchIntrinsic = true;
1687     }
1688   } else if (UseVectorizedMismatchIntrinsic) {
1689     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1690       warning("vectorizedMismatch intrinsics are not available on this CPU");
1691     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1692   }
1693   if (UseAVX >= 2) {
1694     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1695   } else if (UseVectorizedHashCodeIntrinsic) {
1696     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1697       warning("vectorizedHashCode intrinsics are not available on this CPU");
1698     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1699   }
1700 #else
1701   if (UseVectorizedMismatchIntrinsic) {
1702     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1703       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1704     }
1705     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1706   }
1707   if (UseVectorizedHashCodeIntrinsic) {
1708     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1709       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1710     }
1711     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1712   }
1713 #endif // _LP64
1714 
1715   // Use count leading zeros count instruction if available.
1716   if (supports_lzcnt()) {
1717     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1718       UseCountLeadingZerosInstruction = true;
1719     }
1720    } else if (UseCountLeadingZerosInstruction) {
1721     warning("lzcnt instruction is not available on this CPU");
1722     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1723   }
1724 
1725   // Use count trailing zeros instruction if available
1726   if (supports_bmi1()) {
1727     // tzcnt does not require VEX prefix
1728     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1729       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1730         // Don't use tzcnt if BMI1 is switched off on command line.
1731         UseCountTrailingZerosInstruction = false;
1732       } else {
1733         UseCountTrailingZerosInstruction = true;
1734       }
1735     }
1736   } else if (UseCountTrailingZerosInstruction) {
1737     warning("tzcnt instruction is not available on this CPU");
1738     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1739   }
1740 
1741   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1742   // VEX prefix is generated only when AVX > 0.
1743   if (supports_bmi1() && supports_avx()) {
1744     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1745       UseBMI1Instructions = true;
1746     }
1747   } else if (UseBMI1Instructions) {
1748     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1749     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1750   }
1751 
1752   if (supports_bmi2() && supports_avx()) {
1753     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1754       UseBMI2Instructions = true;
1755     }
1756   } else if (UseBMI2Instructions) {
1757     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1758     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1759   }
1760 
1761   // Use population count instruction if available.
1762   if (supports_popcnt()) {
1763     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1764       UsePopCountInstruction = true;
1765     }
1766   } else if (UsePopCountInstruction) {
1767     warning("POPCNT instruction is not available on this CPU");
1768     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1769   }
1770 
1771   // Use fast-string operations if available.
1772   if (supports_erms()) {
1773     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1774       UseFastStosb = true;
1775     }
1776   } else if (UseFastStosb) {
1777     warning("fast-string operations are not available on this CPU");
1778     FLAG_SET_DEFAULT(UseFastStosb, false);
1779   }
1780 
1781   // For AMD Processors use XMM/YMM MOVDQU instructions
1782   // for Object Initialization as default
1783   if (is_amd() && cpu_family() >= 0x19) {
1784     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1785       UseFastStosb = false;
1786     }
1787   }
1788 
1789 #ifdef COMPILER2
1790   if (is_intel() && MaxVectorSize > 16) {
1791     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1792       UseFastStosb = false;
1793     }
1794   }
1795 #endif
1796 
1797   // Use XMM/YMM MOVDQU instruction for Object Initialization
1798   if (UseSSE >= 2 && UseUnalignedLoadStores) {
1799     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1800       UseXMMForObjInit = true;
1801     }
1802   } else if (UseXMMForObjInit) {
1803     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1804     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1805   }
1806 
1807 #ifdef COMPILER2
1808   if (FLAG_IS_DEFAULT(AlignVector)) {
1809     // Modern processors allow misaligned memory operations for vectors.
1810     AlignVector = !UseUnalignedLoadStores;
1811   }
1812 #endif // COMPILER2
1813 
1814   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1815     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1816       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1817     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1818       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1819     }
1820   }
1821 
1822   // Allocation prefetch settings
1823   int cache_line_size = checked_cast<int>(prefetch_data_size());
1824   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1825       (cache_line_size > AllocatePrefetchStepSize)) {
1826     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1827   }
1828 
1829   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1830     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1831     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1832       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1833     }
1834     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1835   }
1836 
1837   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1838     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1839     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1840   }
1841 
1842   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1843     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1844         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1845       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1846     }
1847 #ifdef COMPILER2
1848     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1849       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1850     }
1851 #endif
1852   }
1853 
1854   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1855 #ifdef COMPILER2
1856     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1857       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1858     }
1859 #endif
1860   }
1861 
1862 #ifdef _LP64
1863   // Prefetch settings
1864 
1865   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1866   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1867   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1868   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1869 
1870   // gc copy/scan is disabled if prefetchw isn't supported, because
1871   // Prefetch::write emits an inlined prefetchw on Linux.
1872   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1873   // The used prefetcht0 instruction works for both amd64 and em64t.
1874 
1875   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1876     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1877   }
1878   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1879     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1880   }
1881 #endif
1882 
1883   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1884      (cache_line_size > ContendedPaddingWidth))
1885      ContendedPaddingWidth = cache_line_size;
1886 
1887   // This machine allows unaligned memory accesses
1888   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1889     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1890   }
1891 
1892 #ifndef PRODUCT
1893   if (log_is_enabled(Info, os, cpu)) {
1894     LogStream ls(Log(os, cpu)::info());
1895     outputStream* log = &ls;
1896     log->print_cr("Logical CPUs per core: %u",
1897                   logical_processors_per_package());
1898     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1899     log->print("UseSSE=%d", UseSSE);
1900     if (UseAVX > 0) {
1901       log->print("  UseAVX=%d", UseAVX);
1902     }
1903     if (UseAES) {
1904       log->print("  UseAES=1");
1905     }
1906 #ifdef COMPILER2
1907     if (MaxVectorSize > 0) {
1908       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1909     }
1910 #endif
1911     log->cr();
1912     log->print("Allocation");
1913     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1914       log->print_cr(": no prefetching");
1915     } else {
1916       log->print(" prefetching: ");
1917       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1918         log->print("PREFETCHW");
1919       } else if (UseSSE >= 1) {
1920         if (AllocatePrefetchInstr == 0) {
1921           log->print("PREFETCHNTA");
1922         } else if (AllocatePrefetchInstr == 1) {
1923           log->print("PREFETCHT0");
1924         } else if (AllocatePrefetchInstr == 2) {
1925           log->print("PREFETCHT2");
1926         } else if (AllocatePrefetchInstr == 3) {
1927           log->print("PREFETCHW");
1928         }
1929       }
1930       if (AllocatePrefetchLines > 1) {
1931         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1932       } else {
1933         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1934       }
1935     }
1936 
1937     if (PrefetchCopyIntervalInBytes > 0) {
1938       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1939     }
1940     if (PrefetchScanIntervalInBytes > 0) {
1941       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1942     }
1943     if (ContendedPaddingWidth > 0) {
1944       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1945     }
1946   }
1947 #endif // !PRODUCT
1948   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1949       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1950   }
1951   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1952       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1953   }
1954 }
1955 
1956 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1957   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1958   if (vrt == XenHVM) {
1959     st->print_cr("Xen hardware-assisted virtualization detected");
1960   } else if (vrt == KVM) {
1961     st->print_cr("KVM virtualization detected");
1962   } else if (vrt == VMWare) {
1963     st->print_cr("VMWare virtualization detected");
1964     VirtualizationSupport::print_virtualization_info(st);
1965   } else if (vrt == HyperV) {
1966     st->print_cr("Hyper-V virtualization detected");
1967   } else if (vrt == HyperVRole) {
1968     st->print_cr("Hyper-V role detected");
1969   }
1970 }
1971 
1972 bool VM_Version::compute_has_intel_jcc_erratum() {
1973   if (!is_intel_family_core()) {
1974     // Only Intel CPUs are affected.
1975     return false;
1976   }
1977   // The following table of affected CPUs is based on the following document released by Intel:
1978   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1979   switch (_model) {
1980   case 0x8E:
1981     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1982     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1983     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1984     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1985     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1986     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1987     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1988     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1989     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1990     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1991   case 0x4E:
1992     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1993     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1994     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1995     return _stepping == 0x3;
1996   case 0x55:
1997     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1998     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1999     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2000     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2001     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2002     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2003     return _stepping == 0x4 || _stepping == 0x7;
2004   case 0x5E:
2005     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2006     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2007     return _stepping == 0x3;
2008   case 0x9E:
2009     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2010     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2011     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2012     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2013     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2014     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2015     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2016     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2017     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2018     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2019     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2020     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2021     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2022     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2023     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2024   case 0xA5:
2025     // Not in Intel documentation.
2026     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2027     return true;
2028   case 0xA6:
2029     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2030     return _stepping == 0x0;
2031   case 0xAE:
2032     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2033     return _stepping == 0xA;
2034   default:
2035     // If we are running on another intel machine not recognized in the table, we are okay.
2036     return false;
2037   }
2038 }
2039 
2040 // On Xen, the cpuid instruction returns
2041 //  eax / registers[0]: Version of Xen
2042 //  ebx / registers[1]: chars 'XenV'
2043 //  ecx / registers[2]: chars 'MMXe'
2044 //  edx / registers[3]: chars 'nVMM'
2045 //
2046 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2047 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2048 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2049 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2050 //
2051 // more information :
2052 // https://kb.vmware.com/s/article/1009458
2053 //
2054 void VM_Version::check_virtualizations() {
2055   uint32_t registers[4] = {0};
2056   char signature[13] = {0};
2057 
2058   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2059   // from 0x40000000 until 0x40010000.
2060   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2061   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2062     detect_virt_stub(leaf, registers);
2063     memcpy(signature, &registers[1], 12);
2064 
2065     if (strncmp("VMwareVMware", signature, 12) == 0) {
2066       Abstract_VM_Version::_detected_virtualization = VMWare;
2067       // check for extended metrics from guestlib
2068       VirtualizationSupport::initialize();
2069     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2070       Abstract_VM_Version::_detected_virtualization = HyperV;
2071 #ifdef _WINDOWS
2072       // CPUID leaf 0x40000007 is available to the root partition only.
2073       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2074       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2075       detect_virt_stub(0x40000007, registers);
2076       if ((registers[0] != 0x0) ||
2077           (registers[1] != 0x0) ||
2078           (registers[2] != 0x0) ||
2079           (registers[3] != 0x0)) {
2080         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2081       }
2082 #endif
2083     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2084       Abstract_VM_Version::_detected_virtualization = KVM;
2085     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2086       Abstract_VM_Version::_detected_virtualization = XenHVM;
2087     }
2088   }
2089 }
2090 
2091 #ifdef COMPILER2
2092 // Determine if it's running on Cascade Lake using default options.
2093 bool VM_Version::is_default_intel_cascade_lake() {
2094   return FLAG_IS_DEFAULT(UseAVX) &&
2095          FLAG_IS_DEFAULT(MaxVectorSize) &&
2096          UseAVX > 2 &&
2097          is_intel_cascade_lake();
2098 }
2099 #endif
2100 
2101 bool VM_Version::is_intel_cascade_lake() {
2102   return is_intel_skylake() && _stepping >= 5;
2103 }
2104 
2105 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2106 // for implementing the array copy and clear operations.
2107 // The Intel platforms that supports the serialize instruction
2108 // has improved implementation of 64-byte load/stores and so the default
2109 // threshold is set to 0 for these platforms.
2110 int VM_Version::avx3_threshold() {
2111   return (is_intel_family_core() &&
2112           supports_serialize() &&
2113           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2114 }
2115 
2116 static bool _vm_version_initialized = false;
2117 
2118 void VM_Version::initialize() {
2119   ResourceMark rm;
2120   // Making this stub must be FIRST use of assembler
2121   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2122   if (stub_blob == nullptr) {
2123     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2124   }
2125   CodeBuffer c(stub_blob);
2126   VM_Version_StubGenerator g(&c);
2127 
2128   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2129                                      g.generate_get_cpu_info());
2130   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2131                                      g.generate_detect_virt());
2132 
2133   get_processor_features();
2134 
2135   LP64_ONLY(Assembler::precompute_instructions();)
2136 
2137   if (VM_Version::supports_hv()) { // Supports hypervisor
2138     check_virtualizations();
2139   }
2140   _vm_version_initialized = true;
2141 }
2142 
2143 typedef enum {
2144    CPU_FAMILY_8086_8088  = 0,
2145    CPU_FAMILY_INTEL_286  = 2,
2146    CPU_FAMILY_INTEL_386  = 3,
2147    CPU_FAMILY_INTEL_486  = 4,
2148    CPU_FAMILY_PENTIUM    = 5,
2149    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2150    CPU_FAMILY_PENTIUM_4  = 0xF
2151 } FamilyFlag;
2152 
2153 typedef enum {
2154   RDTSCP_FLAG  = 0x08000000, // bit 27
2155   INTEL64_FLAG = 0x20000000  // bit 29
2156 } _featureExtendedEdxFlag;
2157 
2158 typedef enum {
2159    FPU_FLAG     = 0x00000001,
2160    VME_FLAG     = 0x00000002,
2161    DE_FLAG      = 0x00000004,
2162    PSE_FLAG     = 0x00000008,
2163    TSC_FLAG     = 0x00000010,
2164    MSR_FLAG     = 0x00000020,
2165    PAE_FLAG     = 0x00000040,
2166    MCE_FLAG     = 0x00000080,
2167    CX8_FLAG     = 0x00000100,
2168    APIC_FLAG    = 0x00000200,
2169    SEP_FLAG     = 0x00000800,
2170    MTRR_FLAG    = 0x00001000,
2171    PGE_FLAG     = 0x00002000,
2172    MCA_FLAG     = 0x00004000,
2173    CMOV_FLAG    = 0x00008000,
2174    PAT_FLAG     = 0x00010000,
2175    PSE36_FLAG   = 0x00020000,
2176    PSNUM_FLAG   = 0x00040000,
2177    CLFLUSH_FLAG = 0x00080000,
2178    DTS_FLAG     = 0x00200000,
2179    ACPI_FLAG    = 0x00400000,
2180    MMX_FLAG     = 0x00800000,
2181    FXSR_FLAG    = 0x01000000,
2182    SSE_FLAG     = 0x02000000,
2183    SSE2_FLAG    = 0x04000000,
2184    SS_FLAG      = 0x08000000,
2185    HTT_FLAG     = 0x10000000,
2186    TM_FLAG      = 0x20000000
2187 } FeatureEdxFlag;
2188 
2189 static BufferBlob* cpuid_brand_string_stub_blob;
2190 static const int   cpuid_brand_string_stub_size = 550;
2191 
2192 extern "C" {
2193   typedef void (*getCPUIDBrandString_stub_t)(void*);
2194 }
2195 
2196 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2197 
2198 // VM_Version statics
2199 enum {
2200   ExtendedFamilyIdLength_INTEL = 16,
2201   ExtendedFamilyIdLength_AMD   = 24
2202 };
2203 
2204 const size_t VENDOR_LENGTH = 13;
2205 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2206 static char* _cpu_brand_string = nullptr;
2207 static int64_t _max_qualified_cpu_frequency = 0;
2208 
2209 static int _no_of_threads = 0;
2210 static int _no_of_cores = 0;
2211 
2212 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2213   "8086/8088",
2214   "",
2215   "286",
2216   "386",
2217   "486",
2218   "Pentium",
2219   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2220   "",
2221   "",
2222   "",
2223   "",
2224   "",
2225   "",
2226   "",
2227   "",
2228   "Pentium 4"
2229 };
2230 
2231 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2232   "",
2233   "",
2234   "",
2235   "",
2236   "5x86",
2237   "K5/K6",
2238   "Athlon/AthlonXP",
2239   "",
2240   "",
2241   "",
2242   "",
2243   "",
2244   "",
2245   "",
2246   "",
2247   "Opteron/Athlon64",
2248   "Opteron QC/Phenom",  // Barcelona et.al.
2249   "",
2250   "",
2251   "",
2252   "",
2253   "",
2254   "",
2255   "Zen"
2256 };
2257 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2258 // September 2013, Vol 3C Table 35-1
2259 const char* const _model_id_pentium_pro[] = {
2260   "",
2261   "Pentium Pro",
2262   "",
2263   "Pentium II model 3",
2264   "",
2265   "Pentium II model 5/Xeon/Celeron",
2266   "Celeron",
2267   "Pentium III/Pentium III Xeon",
2268   "Pentium III/Pentium III Xeon",
2269   "Pentium M model 9",    // Yonah
2270   "Pentium III, model A",
2271   "Pentium III, model B",
2272   "",
2273   "Pentium M model D",    // Dothan
2274   "",
2275   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2276   "",
2277   "",
2278   "",
2279   "",
2280   "",
2281   "",
2282   "Celeron",              // 0x16 Celeron 65nm
2283   "Core 2",               // 0x17 Penryn / Harpertown
2284   "",
2285   "",
2286   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2287   "Atom",                 // 0x1B Z5xx series Silverthorn
2288   "",
2289   "Core 2",               // 0x1D Dunnington (6-core)
2290   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2291   "",
2292   "",
2293   "",
2294   "",
2295   "",
2296   "",
2297   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2298   "",
2299   "",
2300   "",                     // 0x28
2301   "",
2302   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2303   "",
2304   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2305   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2306   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2307   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2308   "",
2309   "",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "",
2315   "",
2316   "",
2317   "",
2318   "Ivy Bridge",           // 0x3a
2319   "",
2320   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2321   "",                     // 0x3d "Next Generation Intel Core Processor"
2322   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2323   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2324   "",
2325   "",
2326   "",
2327   "",
2328   "",
2329   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2330   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2331   nullptr
2332 };
2333 
2334 /* Brand ID is for back compatibility
2335  * Newer CPUs uses the extended brand string */
2336 const char* const _brand_id[] = {
2337   "",
2338   "Celeron processor",
2339   "Pentium III processor",
2340   "Intel Pentium III Xeon processor",
2341   "",
2342   "",
2343   "",
2344   "",
2345   "Intel Pentium 4 processor",
2346   nullptr
2347 };
2348 
2349 
2350 const char* const _feature_edx_id[] = {
2351   "On-Chip FPU",
2352   "Virtual Mode Extensions",
2353   "Debugging Extensions",
2354   "Page Size Extensions",
2355   "Time Stamp Counter",
2356   "Model Specific Registers",
2357   "Physical Address Extension",
2358   "Machine Check Exceptions",
2359   "CMPXCHG8B Instruction",
2360   "On-Chip APIC",
2361   "",
2362   "Fast System Call",
2363   "Memory Type Range Registers",
2364   "Page Global Enable",
2365   "Machine Check Architecture",
2366   "Conditional Mov Instruction",
2367   "Page Attribute Table",
2368   "36-bit Page Size Extension",
2369   "Processor Serial Number",
2370   "CLFLUSH Instruction",
2371   "",
2372   "Debug Trace Store feature",
2373   "ACPI registers in MSR space",
2374   "Intel Architecture MMX Technology",
2375   "Fast Float Point Save and Restore",
2376   "Streaming SIMD extensions",
2377   "Streaming SIMD extensions 2",
2378   "Self-Snoop",
2379   "Hyper Threading",
2380   "Thermal Monitor",
2381   "",
2382   "Pending Break Enable"
2383 };
2384 
2385 const char* const _feature_extended_edx_id[] = {
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "",
2392   "",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "SYSCALL/SYSRET",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "Execute Disable Bit",
2407   "",
2408   "",
2409   "",
2410   "",
2411   "",
2412   "",
2413   "RDTSCP",
2414   "",
2415   "Intel 64 Architecture",
2416   "",
2417   ""
2418 };
2419 
2420 const char* const _feature_ecx_id[] = {
2421   "Streaming SIMD Extensions 3",
2422   "PCLMULQDQ",
2423   "64-bit DS Area",
2424   "MONITOR/MWAIT instructions",
2425   "CPL Qualified Debug Store",
2426   "Virtual Machine Extensions",
2427   "Safer Mode Extensions",
2428   "Enhanced Intel SpeedStep technology",
2429   "Thermal Monitor 2",
2430   "Supplemental Streaming SIMD Extensions 3",
2431   "L1 Context ID",
2432   "",
2433   "Fused Multiply-Add",
2434   "CMPXCHG16B",
2435   "xTPR Update Control",
2436   "Perfmon and Debug Capability",
2437   "",
2438   "Process-context identifiers",
2439   "Direct Cache Access",
2440   "Streaming SIMD extensions 4.1",
2441   "Streaming SIMD extensions 4.2",
2442   "x2APIC",
2443   "MOVBE",
2444   "Popcount instruction",
2445   "TSC-Deadline",
2446   "AESNI",
2447   "XSAVE",
2448   "OSXSAVE",
2449   "AVX",
2450   "F16C",
2451   "RDRAND",
2452   ""
2453 };
2454 
2455 const char* const _feature_extended_ecx_id[] = {
2456   "LAHF/SAHF instruction support",
2457   "Core multi-processor legacy mode",
2458   "",
2459   "",
2460   "",
2461   "Advanced Bit Manipulations: LZCNT",
2462   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2463   "Misaligned SSE mode",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "",
2485   "",
2486   "",
2487   ""
2488 };
2489 
2490 void VM_Version::initialize_tsc(void) {
2491   ResourceMark rm;
2492 
2493   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2494   if (cpuid_brand_string_stub_blob == nullptr) {
2495     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2496   }
2497   CodeBuffer c(cpuid_brand_string_stub_blob);
2498   VM_Version_StubGenerator g(&c);
2499   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2500                                    g.generate_getCPUIDBrandString());
2501 }
2502 
2503 const char* VM_Version::cpu_model_description(void) {
2504   uint32_t cpu_family = extended_cpu_family();
2505   uint32_t cpu_model = extended_cpu_model();
2506   const char* model = nullptr;
2507 
2508   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2509     for (uint32_t i = 0; i <= cpu_model; i++) {
2510       model = _model_id_pentium_pro[i];
2511       if (model == nullptr) {
2512         break;
2513       }
2514     }
2515   }
2516   return model;
2517 }
2518 
2519 const char* VM_Version::cpu_brand_string(void) {
2520   if (_cpu_brand_string == nullptr) {
2521     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2522     if (nullptr == _cpu_brand_string) {
2523       return nullptr;
2524     }
2525     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2526     if (ret_val != OS_OK) {
2527       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2528       _cpu_brand_string = nullptr;
2529     }
2530   }
2531   return _cpu_brand_string;
2532 }
2533 
2534 const char* VM_Version::cpu_brand(void) {
2535   const char*  brand  = nullptr;
2536 
2537   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2538     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2539     brand = _brand_id[0];
2540     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2541       brand = _brand_id[i];
2542     }
2543   }
2544   return brand;
2545 }
2546 
2547 bool VM_Version::cpu_is_em64t(void) {
2548   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2549 }
2550 
2551 bool VM_Version::is_netburst(void) {
2552   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2553 }
2554 
2555 bool VM_Version::supports_tscinv_ext(void) {
2556   if (!supports_tscinv_bit()) {
2557     return false;
2558   }
2559 
2560   if (is_intel()) {
2561     return true;
2562   }
2563 
2564   if (is_amd()) {
2565     return !is_amd_Barcelona();
2566   }
2567 
2568   if (is_hygon()) {
2569     return true;
2570   }
2571 
2572   return false;
2573 }
2574 
2575 void VM_Version::resolve_cpu_information_details(void) {
2576 
2577   // in future we want to base this information on proper cpu
2578   // and cache topology enumeration such as:
2579   // Intel 64 Architecture Processor Topology Enumeration
2580   // which supports system cpu and cache topology enumeration
2581   // either using 2xAPICIDs or initial APICIDs
2582 
2583   // currently only rough cpu information estimates
2584   // which will not necessarily reflect the exact configuration of the system
2585 
2586   // this is the number of logical hardware threads
2587   // visible to the operating system
2588   _no_of_threads = os::processor_count();
2589 
2590   // find out number of threads per cpu package
2591   int threads_per_package = threads_per_core() * cores_per_cpu();
2592 
2593   // use amount of threads visible to the process in order to guess number of sockets
2594   _no_of_sockets = _no_of_threads / threads_per_package;
2595 
2596   // process might only see a subset of the total number of threads
2597   // from a single processor package. Virtualization/resource management for example.
2598   // If so then just write a hard 1 as num of pkgs.
2599   if (0 == _no_of_sockets) {
2600     _no_of_sockets = 1;
2601   }
2602 
2603   // estimate the number of cores
2604   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2605 }
2606 
2607 
2608 const char* VM_Version::cpu_family_description(void) {
2609   int cpu_family_id = extended_cpu_family();
2610   if (is_amd()) {
2611     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2612       return _family_id_amd[cpu_family_id];
2613     }
2614   }
2615   if (is_intel()) {
2616     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2617       return cpu_model_description();
2618     }
2619     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2620       return _family_id_intel[cpu_family_id];
2621     }
2622   }
2623   if (is_hygon()) {
2624     return "Dhyana";
2625   }
2626   return "Unknown x86";
2627 }
2628 
2629 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2630   assert(buf != nullptr, "buffer is null!");
2631   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2632 
2633   const char* cpu_type = nullptr;
2634   const char* x64 = nullptr;
2635 
2636   if (is_intel()) {
2637     cpu_type = "Intel";
2638     x64 = cpu_is_em64t() ? " Intel64" : "";
2639   } else if (is_amd()) {
2640     cpu_type = "AMD";
2641     x64 = cpu_is_em64t() ? " AMD64" : "";
2642   } else if (is_hygon()) {
2643     cpu_type = "Hygon";
2644     x64 = cpu_is_em64t() ? " AMD64" : "";
2645   } else {
2646     cpu_type = "Unknown x86";
2647     x64 = cpu_is_em64t() ? " x86_64" : "";
2648   }
2649 
2650   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2651     cpu_type,
2652     cpu_family_description(),
2653     supports_ht() ? " (HT)" : "",
2654     supports_sse3() ? " SSE3" : "",
2655     supports_ssse3() ? " SSSE3" : "",
2656     supports_sse4_1() ? " SSE4.1" : "",
2657     supports_sse4_2() ? " SSE4.2" : "",
2658     supports_sse4a() ? " SSE4A" : "",
2659     is_netburst() ? " Netburst" : "",
2660     is_intel_family_core() ? " Core" : "",
2661     x64);
2662 
2663   return OS_OK;
2664 }
2665 
2666 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2667   assert(buf != nullptr, "buffer is null!");
2668   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2669   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2670 
2671   // invoke newly generated asm code to fetch CPU Brand String
2672   getCPUIDBrandString_stub(&_cpuid_info);
2673 
2674   // fetch results into buffer
2675   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2676   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2677   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2678   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2679   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2680   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2681   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2682   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2683   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2684   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2685   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2686   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2687 
2688   return OS_OK;
2689 }
2690 
2691 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2692   guarantee(buf != nullptr, "buffer is null!");
2693   guarantee(buf_len > 0, "buffer len not enough!");
2694 
2695   unsigned int flag = 0;
2696   unsigned int fi = 0;
2697   size_t       written = 0;
2698   const char*  prefix = "";
2699 
2700 #define WRITE_TO_BUF(string)                                                          \
2701   {                                                                                   \
2702     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2703     if (res < 0) {                                                                    \
2704       return buf_len - 1;                                                             \
2705     }                                                                                 \
2706     written += res;                                                                   \
2707     if (prefix[0] == '\0') {                                                          \
2708       prefix = ", ";                                                                  \
2709     }                                                                                 \
2710   }
2711 
2712   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2713     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2714       continue; /* no hyperthreading */
2715     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2716       continue; /* no fast system call */
2717     }
2718     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2719       WRITE_TO_BUF(_feature_edx_id[fi]);
2720     }
2721   }
2722 
2723   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2724     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2725       WRITE_TO_BUF(_feature_ecx_id[fi]);
2726     }
2727   }
2728 
2729   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2730     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2731       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2732     }
2733   }
2734 
2735   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2736     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2737       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2738     }
2739   }
2740 
2741   if (supports_tscinv_bit()) {
2742       WRITE_TO_BUF("Invariant TSC");
2743   }
2744 
2745   return written;
2746 }
2747 
2748 /**
2749  * Write a detailed description of the cpu to a given buffer, including
2750  * feature set.
2751  */
2752 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2753   assert(buf != nullptr, "buffer is null!");
2754   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2755 
2756   static const char* unknown = "<unknown>";
2757   char               vendor_id[VENDOR_LENGTH];
2758   const char*        family = nullptr;
2759   const char*        model = nullptr;
2760   const char*        brand = nullptr;
2761   int                outputLen = 0;
2762 
2763   family = cpu_family_description();
2764   if (family == nullptr) {
2765     family = unknown;
2766   }
2767 
2768   model = cpu_model_description();
2769   if (model == nullptr) {
2770     model = unknown;
2771   }
2772 
2773   brand = cpu_brand_string();
2774 
2775   if (brand == nullptr) {
2776     brand = cpu_brand();
2777     if (brand == nullptr) {
2778       brand = unknown;
2779     }
2780   }
2781 
2782   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2783   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2784   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2785   vendor_id[VENDOR_LENGTH-1] = '\0';
2786 
2787   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2788     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2789     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2790     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2791     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2792     "Supports: ",
2793     brand,
2794     vendor_id,
2795     family,
2796     extended_cpu_family(),
2797     model,
2798     extended_cpu_model(),
2799     cpu_stepping(),
2800     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2801     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2802     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2803     _cpuid_info.std_cpuid1_eax.value,
2804     _cpuid_info.std_cpuid1_ebx.value,
2805     _cpuid_info.std_cpuid1_ecx.value,
2806     _cpuid_info.std_cpuid1_edx.value,
2807     _cpuid_info.ext_cpuid1_eax,
2808     _cpuid_info.ext_cpuid1_ebx,
2809     _cpuid_info.ext_cpuid1_ecx,
2810     _cpuid_info.ext_cpuid1_edx);
2811 
2812   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2813     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2814     return OS_ERR;
2815   }
2816 
2817   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2818 
2819   return OS_OK;
2820 }
2821 
2822 
2823 // Fill in Abstract_VM_Version statics
2824 void VM_Version::initialize_cpu_information() {
2825   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2826   assert(!_initialized, "shouldn't be initialized yet");
2827   resolve_cpu_information_details();
2828 
2829   // initialize cpu_name and cpu_desc
2830   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2831   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2832   _initialized = true;
2833 }
2834 
2835 /**
2836  *  For information about extracting the frequency from the cpu brand string, please see:
2837  *
2838  *    Intel Processor Identification and the CPUID Instruction
2839  *    Application Note 485
2840  *    May 2012
2841  *
2842  * The return value is the frequency in Hz.
2843  */
2844 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2845   const char* const brand_string = cpu_brand_string();
2846   if (brand_string == nullptr) {
2847     return 0;
2848   }
2849   const int64_t MEGA = 1000000;
2850   int64_t multiplier = 0;
2851   int64_t frequency = 0;
2852   uint8_t idx = 0;
2853   // The brand string buffer is at most 48 bytes.
2854   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2855   for (; idx < 48-2; ++idx) {
2856     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2857     // Search brand string for "yHz" where y is M, G, or T.
2858     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2859       if (brand_string[idx] == 'M') {
2860         multiplier = MEGA;
2861       } else if (brand_string[idx] == 'G') {
2862         multiplier = MEGA * 1000;
2863       } else if (brand_string[idx] == 'T') {
2864         multiplier = MEGA * MEGA;
2865       }
2866       break;
2867     }
2868   }
2869   if (multiplier > 0) {
2870     // Compute frequency (in Hz) from brand string.
2871     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2872       frequency =  (brand_string[idx-4] - '0') * multiplier;
2873       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2874       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2875     } else { // format is "xxxx"
2876       frequency =  (brand_string[idx-4] - '0') * 1000;
2877       frequency += (brand_string[idx-3] - '0') * 100;
2878       frequency += (brand_string[idx-2] - '0') * 10;
2879       frequency += (brand_string[idx-1] - '0');
2880       frequency *= multiplier;
2881     }
2882   }
2883   return frequency;
2884 }
2885 
2886 
2887 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2888   if (_max_qualified_cpu_frequency == 0) {
2889     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2890   }
2891   return _max_qualified_cpu_frequency;
2892 }
2893 
2894 uint64_t VM_Version::feature_flags() {
2895   uint64_t result = 0;
2896   if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
2897     result |= CPU_CX8;
2898   if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
2899     result |= CPU_CMOV;
2900   if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
2901     result |= CPU_FLUSH;
2902 #ifdef _LP64
2903   // clflush should always be available on x86_64
2904   // if not we are in real trouble because we rely on it
2905   // to flush the code cache.
2906   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2907 #endif
2908   if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2909       _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
2910     result |= CPU_FXSR;
2911   // HT flag is set for multi-core processors also.
2912   if (threads_per_core() > 1)
2913     result |= CPU_HT;
2914   if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2915       _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
2916     result |= CPU_MMX;
2917   if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
2918     result |= CPU_SSE;
2919   if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
2920     result |= CPU_SSE2;
2921   if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
2922     result |= CPU_SSE3;
2923   if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
2924     result |= CPU_SSSE3;
2925   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
2926     result |= CPU_SSE4_1;
2927   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
2928     result |= CPU_SSE4_2;
2929   if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
2930     result |= CPU_POPCNT;
2931   if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
2932       _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
2933       _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
2934       _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
2935     result |= CPU_AVX;
2936     result |= CPU_VZEROUPPER;
2937     if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0)
2938       result |= CPU_F16C;
2939     if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
2940       result |= CPU_AVX2;
2941     if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
2942         _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
2943         _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
2944         _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
2945       result |= CPU_AVX512F;
2946       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
2947         result |= CPU_AVX512CD;
2948       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
2949         result |= CPU_AVX512DQ;
2950       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0)
2951         result |= CPU_AVX512_IFMA;
2952       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
2953         result |= CPU_AVX512PF;
2954       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
2955         result |= CPU_AVX512ER;
2956       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
2957         result |= CPU_AVX512BW;
2958       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
2959         result |= CPU_AVX512VL;
2960       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2961         result |= CPU_AVX512_VPOPCNTDQ;
2962       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2963         result |= CPU_AVX512_VPCLMULQDQ;
2964       if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
2965         result |= CPU_AVX512_VAES;
2966       if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
2967         result |= CPU_GFNI;
2968       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
2969         result |= CPU_AVX512_VNNI;
2970       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2971         result |= CPU_AVX512_BITALG;
2972       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2973         result |= CPU_AVX512_VBMI;
2974       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2975         result |= CPU_AVX512_VBMI2;
2976     }
2977   }
2978   if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
2979     result |= CPU_HV;
2980   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
2981     result |= CPU_BMI1;
2982   if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
2983     result |= CPU_TSC;
2984   if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
2985     result |= CPU_TSCINV_BIT;
2986   if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
2987     result |= CPU_AES;
2988   if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
2989     result |= CPU_ERMS;
2990   if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2991     result |= CPU_FSRM;
2992   if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
2993     result |= CPU_CLMUL;
2994   if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
2995     result |= CPU_RTM;
2996   if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
2997      result |= CPU_ADX;
2998   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
2999     result |= CPU_BMI2;
3000   if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
3001     result |= CPU_SHA;
3002   if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
3003     result |= CPU_FMA;
3004   if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
3005     result |= CPU_FLUSHOPT;
3006   if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
3007     result |= CPU_RDTSCP;
3008   if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
3009     result |= CPU_RDPID;
3010 
3011   // AMD|Hygon features.
3012   if (is_amd_family()) {
3013     if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
3014         (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
3015       result |= CPU_3DNOW_PREFETCH;
3016     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
3017       result |= CPU_LZCNT;
3018     if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
3019       result |= CPU_SSE4A;
3020   }
3021 
3022   // Intel features.
3023   if (is_intel()) {
3024     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3025       result |= CPU_LZCNT;
3026     }
3027     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3028       result |= CPU_3DNOW_PREFETCH;
3029     }
3030     if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
3031       result |= CPU_CLWB;
3032     }
3033     if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
3034       result |= CPU_SERIALIZE;
3035   }
3036 
3037   // ZX features.
3038   if (is_zx()) {
3039     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3040       result |= CPU_LZCNT;
3041     }
3042     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3043       result |= CPU_3DNOW_PREFETCH;
3044     }
3045   }
3046 
3047   // Protection key features.
3048   if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) {
3049     result |= CPU_PKU;
3050   }
3051   if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) {
3052     result |= CPU_OSPKE;
3053   }
3054 
3055   // Control flow enforcement (CET) features.
3056   if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) {
3057     result |= CPU_CET_SS;
3058   }
3059   if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) {
3060     result |= CPU_CET_IBT;
3061   }
3062 
3063   // Composite features.
3064   if (supports_tscinv_bit() &&
3065       ((is_amd_family() && !is_amd_Barcelona()) ||
3066        is_intel_tsc_synched_at_init())) {
3067     result |= CPU_TSCINV;
3068   }
3069 
3070   return result;
3071 }
3072 
3073 bool VM_Version::os_supports_avx_vectors() {
3074   bool retVal = false;
3075   int nreg = 2 LP64_ONLY(+2);
3076   if (supports_evex()) {
3077     // Verify that OS save/restore all bits of EVEX registers
3078     // during signal processing.
3079     retVal = true;
3080     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3081       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3082         retVal = false;
3083         break;
3084       }
3085     }
3086   } else if (supports_avx()) {
3087     // Verify that OS save/restore all bits of AVX registers
3088     // during signal processing.
3089     retVal = true;
3090     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3091       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3092         retVal = false;
3093         break;
3094       }
3095     }
3096     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3097     if (retVal == false) {
3098       // Verify that OS save/restore all bits of EVEX registers
3099       // during signal processing.
3100       retVal = true;
3101       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3102         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3103           retVal = false;
3104           break;
3105         }
3106       }
3107     }
3108   }
3109   return retVal;
3110 }
3111 
3112 uint VM_Version::cores_per_cpu() {
3113   uint result = 1;
3114   if (is_intel()) {
3115     bool supports_topology = supports_processor_topology();
3116     if (supports_topology) {
3117       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3118                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3119     }
3120     if (!supports_topology || result == 0) {
3121       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3122     }
3123   } else if (is_amd_family()) {
3124     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3125   } else if (is_zx()) {
3126     bool supports_topology = supports_processor_topology();
3127     if (supports_topology) {
3128       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3129                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3130     }
3131     if (!supports_topology || result == 0) {
3132       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3133     }
3134   }
3135   return result;
3136 }
3137 
3138 uint VM_Version::threads_per_core() {
3139   uint result = 1;
3140   if (is_intel() && supports_processor_topology()) {
3141     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3142   } else if (is_zx() && supports_processor_topology()) {
3143     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3144   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3145     if (cpu_family() >= 0x17) {
3146       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3147     } else {
3148       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3149                  cores_per_cpu();
3150     }
3151   }
3152   return (result == 0 ? 1 : result);
3153 }
3154 
3155 uint VM_Version::L1_line_size() {
3156   uint result = 0;
3157   if (is_intel()) {
3158     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3159   } else if (is_amd_family()) {
3160     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3161   } else if (is_zx()) {
3162     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3163   }
3164   if (result < 32) // not defined ?
3165     result = 32;   // 32 bytes by default on x86 and other x64
3166   return result;
3167 }
3168 
3169 bool VM_Version::is_intel_tsc_synched_at_init() {
3170   if (is_intel_family_core()) {
3171     uint32_t ext_model = extended_cpu_model();
3172     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3173         ext_model == CPU_MODEL_WESTMERE_EP    ||
3174         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3175         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3176       // <= 2-socket invariant tsc support. EX versions are usually used
3177       // in > 2-socket systems and likely don't synchronize tscs at
3178       // initialization.
3179       // Code that uses tsc values must be prepared for them to arbitrarily
3180       // jump forward or backward.
3181       return true;
3182     }
3183   }
3184   return false;
3185 }
3186 
3187 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3188   // Hardware prefetching (distance/size in bytes):
3189   // Pentium 3 -  64 /  32
3190   // Pentium 4 - 256 / 128
3191   // Athlon    -  64 /  32 ????
3192   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3193   // Core      - 128 /  64
3194   //
3195   // Software prefetching (distance in bytes / instruction with best score):
3196   // Pentium 3 - 128 / prefetchnta
3197   // Pentium 4 - 512 / prefetchnta
3198   // Athlon    - 128 / prefetchnta
3199   // Opteron   - 256 / prefetchnta
3200   // Core      - 256 / prefetchnta
3201   // It will be used only when AllocatePrefetchStyle > 0
3202 
3203   if (is_amd_family()) { // AMD | Hygon
3204     if (supports_sse2()) {
3205       return 256; // Opteron
3206     } else {
3207       return 128; // Athlon
3208     }
3209   } else { // Intel
3210     if (supports_sse3() && cpu_family() == 6) {
3211       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3212         return 192;
3213       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3214 #ifdef _LP64
3215         return 384;
3216 #else
3217         return 320;
3218 #endif
3219       }
3220     }
3221     if (supports_sse2()) {
3222       if (cpu_family() == 6) {
3223         return 256; // Pentium M, Core, Core2
3224       } else {
3225         return 512; // Pentium 4
3226       }
3227     } else {
3228       return 128; // Pentium 3 (and all other old CPUs)
3229     }
3230   }
3231 }
3232 
3233 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3234   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3235   switch (id) {
3236   case vmIntrinsics::_floatToFloat16:
3237   case vmIntrinsics::_float16ToFloat:
3238     if (!supports_float16()) {
3239       return false;
3240     }
3241     break;
3242   default:
3243     break;
3244   }
3245   return true;
3246 }