1 /*
   2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = 0;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = 0;
  59 
  60 static BufferBlob* stub_blob;
  61 static const int stub_size = 2000;
  62 
  63 extern "C" {
  64   typedef void (*get_cpu_info_stub_t)(void*);
  65   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  66 }
  67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  68 static detect_virt_stub_t detect_virt_stub = nullptr;
  69 
  70 #ifdef _LP64
  71 
  72 bool VM_Version::supports_clflush() {
  73   // clflush should always be available on x86_64
  74   // if not we are in real trouble because we rely on it
  75   // to flush the code cache.
  76   // Unfortunately, Assembler::clflush is currently called as part
  77   // of generation of the code cache flush routine. This happens
  78   // under Universe::init before the processor features are set
  79   // up. Assembler::flush calls this routine to check that clflush
  80   // is allowed. So, we give the caller a free pass if Universe init
  81   // is still in progress.
  82   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  83   return true;
  84 }
  85 #endif
  86 
  87 #define CPUID_STANDARD_FN   0x0
  88 #define CPUID_STANDARD_FN_1 0x1
  89 #define CPUID_STANDARD_FN_4 0x4
  90 #define CPUID_STANDARD_FN_B 0xb
  91 
  92 #define CPUID_EXTENDED_FN   0x80000000
  93 #define CPUID_EXTENDED_FN_1 0x80000001
  94 #define CPUID_EXTENDED_FN_2 0x80000002
  95 #define CPUID_EXTENDED_FN_3 0x80000003
  96 #define CPUID_EXTENDED_FN_4 0x80000004
  97 #define CPUID_EXTENDED_FN_7 0x80000007
  98 #define CPUID_EXTENDED_FN_8 0x80000008
  99 
 100 class VM_Version_StubGenerator: public StubCodeGenerator {
 101  public:
 102 
 103   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 104 
 105   address generate_get_cpu_info() {
 106     // Flags to test CPU type.
 107     const uint32_t HS_EFL_AC = 0x40000;
 108     const uint32_t HS_EFL_ID = 0x200000;
 109     // Values for when we don't have a CPUID instruction.
 110     const int      CPU_FAMILY_SHIFT = 8;
 111     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 112     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 113     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 114 
 115     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 116     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 117     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 118 
 119     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 120 #   define __ _masm->
 121 
 122     address start = __ pc();
 123 
 124     //
 125     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 126     //
 127     // LP64: rcx and rdx are first and second argument registers on windows
 128 
 129     __ push(rbp);
 130 #ifdef _LP64
 131     __ mov(rbp, c_rarg0); // cpuid_info address
 132 #else
 133     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 134 #endif
 135     __ push(rbx);
 136     __ push(rsi);
 137     __ pushf();          // preserve rbx, and flags
 138     __ pop(rax);
 139     __ push(rax);
 140     __ mov(rcx, rax);
 141     //
 142     // if we are unable to change the AC flag, we have a 386
 143     //
 144     __ xorl(rax, HS_EFL_AC);
 145     __ push(rax);
 146     __ popf();
 147     __ pushf();
 148     __ pop(rax);
 149     __ cmpptr(rax, rcx);
 150     __ jccb(Assembler::notEqual, detect_486);
 151 
 152     __ movl(rax, CPU_FAMILY_386);
 153     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 154     __ jmp(done);
 155 
 156     //
 157     // If we are unable to change the ID flag, we have a 486 which does
 158     // not support the "cpuid" instruction.
 159     //
 160     __ bind(detect_486);
 161     __ mov(rax, rcx);
 162     __ xorl(rax, HS_EFL_ID);
 163     __ push(rax);
 164     __ popf();
 165     __ pushf();
 166     __ pop(rax);
 167     __ cmpptr(rcx, rax);
 168     __ jccb(Assembler::notEqual, detect_586);
 169 
 170     __ bind(cpu486);
 171     __ movl(rax, CPU_FAMILY_486);
 172     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 173     __ jmp(done);
 174 
 175     //
 176     // At this point, we have a chip which supports the "cpuid" instruction
 177     //
 178     __ bind(detect_586);
 179     __ xorl(rax, rax);
 180     __ cpuid();
 181     __ orl(rax, rax);
 182     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 183                                         // value of at least 1, we give up and
 184                                         // assume a 486
 185     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 186     __ movl(Address(rsi, 0), rax);
 187     __ movl(Address(rsi, 4), rbx);
 188     __ movl(Address(rsi, 8), rcx);
 189     __ movl(Address(rsi,12), rdx);
 190 
 191     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 192     __ jccb(Assembler::belowEqual, std_cpuid4);
 193 
 194     //
 195     // cpuid(0xB) Processor Topology
 196     //
 197     __ movl(rax, 0xb);
 198     __ xorl(rcx, rcx);   // Threads level
 199     __ cpuid();
 200 
 201     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 202     __ movl(Address(rsi, 0), rax);
 203     __ movl(Address(rsi, 4), rbx);
 204     __ movl(Address(rsi, 8), rcx);
 205     __ movl(Address(rsi,12), rdx);
 206 
 207     __ movl(rax, 0xb);
 208     __ movl(rcx, 1);     // Cores level
 209     __ cpuid();
 210     __ push(rax);
 211     __ andl(rax, 0x1f);  // Determine if valid topology level
 212     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 213     __ andl(rax, 0xffff);
 214     __ pop(rax);
 215     __ jccb(Assembler::equal, std_cpuid4);
 216 
 217     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 218     __ movl(Address(rsi, 0), rax);
 219     __ movl(Address(rsi, 4), rbx);
 220     __ movl(Address(rsi, 8), rcx);
 221     __ movl(Address(rsi,12), rdx);
 222 
 223     __ movl(rax, 0xb);
 224     __ movl(rcx, 2);     // Packages level
 225     __ cpuid();
 226     __ push(rax);
 227     __ andl(rax, 0x1f);  // Determine if valid topology level
 228     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 229     __ andl(rax, 0xffff);
 230     __ pop(rax);
 231     __ jccb(Assembler::equal, std_cpuid4);
 232 
 233     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 234     __ movl(Address(rsi, 0), rax);
 235     __ movl(Address(rsi, 4), rbx);
 236     __ movl(Address(rsi, 8), rcx);
 237     __ movl(Address(rsi,12), rdx);
 238 
 239     //
 240     // cpuid(0x4) Deterministic cache params
 241     //
 242     __ bind(std_cpuid4);
 243     __ movl(rax, 4);
 244     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 245     __ jccb(Assembler::greater, std_cpuid1);
 246 
 247     __ xorl(rcx, rcx);   // L1 cache
 248     __ cpuid();
 249     __ push(rax);
 250     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 251     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 252     __ pop(rax);
 253     __ jccb(Assembler::equal, std_cpuid1);
 254 
 255     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 256     __ movl(Address(rsi, 0), rax);
 257     __ movl(Address(rsi, 4), rbx);
 258     __ movl(Address(rsi, 8), rcx);
 259     __ movl(Address(rsi,12), rdx);
 260 
 261     //
 262     // Standard cpuid(0x1)
 263     //
 264     __ bind(std_cpuid1);
 265     __ movl(rax, 1);
 266     __ cpuid();
 267     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 268     __ movl(Address(rsi, 0), rax);
 269     __ movl(Address(rsi, 4), rbx);
 270     __ movl(Address(rsi, 8), rcx);
 271     __ movl(Address(rsi,12), rdx);
 272 
 273     //
 274     // Check if OS has enabled XGETBV instruction to access XCR0
 275     // (OSXSAVE feature flag) and CPU supports AVX
 276     //
 277     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 278     __ cmpl(rcx, 0x18000000);
 279     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 280 
 281     //
 282     // XCR0, XFEATURE_ENABLED_MASK register
 283     //
 284     __ xorl(rcx, rcx);   // zero for XCR0 register
 285     __ xgetbv();
 286     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 287     __ movl(Address(rsi, 0), rax);
 288     __ movl(Address(rsi, 4), rdx);
 289 
 290     //
 291     // cpuid(0x7) Structured Extended Features
 292     //
 293     __ bind(sef_cpuid);
 294     __ movl(rax, 7);
 295     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 296     __ jccb(Assembler::greater, ext_cpuid);
 297 
 298     __ xorl(rcx, rcx);
 299     __ cpuid();
 300     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 301     __ movl(Address(rsi, 0), rax);
 302     __ movl(Address(rsi, 4), rbx);
 303     __ movl(Address(rsi, 8), rcx);
 304     __ movl(Address(rsi, 12), rdx);
 305 
 306     //
 307     // Extended cpuid(0x80000000)
 308     //
 309     __ bind(ext_cpuid);
 310     __ movl(rax, 0x80000000);
 311     __ cpuid();
 312     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 313     __ jcc(Assembler::belowEqual, done);
 314     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 315     __ jcc(Assembler::belowEqual, ext_cpuid1);
 316     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 317     __ jccb(Assembler::belowEqual, ext_cpuid5);
 318     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 319     __ jccb(Assembler::belowEqual, ext_cpuid7);
 320     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 321     __ jccb(Assembler::belowEqual, ext_cpuid8);
 322     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 323     __ jccb(Assembler::below, ext_cpuid8);
 324     //
 325     // Extended cpuid(0x8000001E)
 326     //
 327     __ movl(rax, 0x8000001E);
 328     __ cpuid();
 329     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 330     __ movl(Address(rsi, 0), rax);
 331     __ movl(Address(rsi, 4), rbx);
 332     __ movl(Address(rsi, 8), rcx);
 333     __ movl(Address(rsi,12), rdx);
 334 
 335     //
 336     // Extended cpuid(0x80000008)
 337     //
 338     __ bind(ext_cpuid8);
 339     __ movl(rax, 0x80000008);
 340     __ cpuid();
 341     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 342     __ movl(Address(rsi, 0), rax);
 343     __ movl(Address(rsi, 4), rbx);
 344     __ movl(Address(rsi, 8), rcx);
 345     __ movl(Address(rsi,12), rdx);
 346 
 347     //
 348     // Extended cpuid(0x80000007)
 349     //
 350     __ bind(ext_cpuid7);
 351     __ movl(rax, 0x80000007);
 352     __ cpuid();
 353     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 354     __ movl(Address(rsi, 0), rax);
 355     __ movl(Address(rsi, 4), rbx);
 356     __ movl(Address(rsi, 8), rcx);
 357     __ movl(Address(rsi,12), rdx);
 358 
 359     //
 360     // Extended cpuid(0x80000005)
 361     //
 362     __ bind(ext_cpuid5);
 363     __ movl(rax, 0x80000005);
 364     __ cpuid();
 365     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 366     __ movl(Address(rsi, 0), rax);
 367     __ movl(Address(rsi, 4), rbx);
 368     __ movl(Address(rsi, 8), rcx);
 369     __ movl(Address(rsi,12), rdx);
 370 
 371     //
 372     // Extended cpuid(0x80000001)
 373     //
 374     __ bind(ext_cpuid1);
 375     __ movl(rax, 0x80000001);
 376     __ cpuid();
 377     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 378     __ movl(Address(rsi, 0), rax);
 379     __ movl(Address(rsi, 4), rbx);
 380     __ movl(Address(rsi, 8), rcx);
 381     __ movl(Address(rsi,12), rdx);
 382 
 383     //
 384     // Check if OS has enabled XGETBV instruction to access XCR0
 385     // (OSXSAVE feature flag) and CPU supports AVX
 386     //
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 388     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 389     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 390     __ cmpl(rcx, 0x18000000);
 391     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 392 
 393     __ movl(rax, 0x6);
 394     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 395     __ cmpl(rax, 0x6);
 396     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 397 
 398     // we need to bridge farther than imm8, so we use this island as a thunk
 399     __ bind(done);
 400     __ jmp(wrapup);
 401 
 402     __ bind(start_simd_check);
 403     //
 404     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 405     // registers are not restored after a signal processing.
 406     // Generate SEGV here (reference through null)
 407     // and check upper YMM/ZMM bits after it.
 408     //
 409     int saved_useavx = UseAVX;
 410     int saved_usesse = UseSSE;
 411 
 412     // If UseAVX is uninitialized or is set by the user to include EVEX
 413     if (use_evex) {
 414       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 415       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 416       __ movl(rax, 0x10000);
 417       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 418       __ cmpl(rax, 0x10000);
 419       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 420       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 421       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 422       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 423       __ movl(rax, 0xE0);
 424       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 425       __ cmpl(rax, 0xE0);
 426       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 427 
 428       if (FLAG_IS_DEFAULT(UseAVX)) {
 429         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 430         __ movl(rax, Address(rsi, 0));
 431         __ cmpl(rax, 0x50654);              // If it is Skylake
 432         __ jcc(Assembler::equal, legacy_setup);
 433       }
 434       // EVEX setup: run in lowest evex mode
 435       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 436       UseAVX = 3;
 437       UseSSE = 2;
 438 #ifdef _WINDOWS
 439       // xmm5-xmm15 are not preserved by caller on windows
 440       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 441       __ subptr(rsp, 64);
 442       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 443 #ifdef _LP64
 444       __ subptr(rsp, 64);
 445       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 446       __ subptr(rsp, 64);
 447       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 448 #endif // _LP64
 449 #endif // _WINDOWS
 450 
 451       // load value into all 64 bytes of zmm7 register
 452       __ movl(rcx, VM_Version::ymm_test_value());
 453       __ movdl(xmm0, rcx);
 454       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 455       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 456 #ifdef _LP64
 457       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 458       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 459 #endif
 460       VM_Version::clean_cpuFeatures();
 461       __ jmp(save_restore_except);
 462     }
 463 
 464     __ bind(legacy_setup);
 465     // AVX setup
 466     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 467     UseAVX = 1;
 468     UseSSE = 2;
 469 #ifdef _WINDOWS
 470     __ subptr(rsp, 32);
 471     __ vmovdqu(Address(rsp, 0), xmm7);
 472 #ifdef _LP64
 473     __ subptr(rsp, 32);
 474     __ vmovdqu(Address(rsp, 0), xmm8);
 475     __ subptr(rsp, 32);
 476     __ vmovdqu(Address(rsp, 0), xmm15);
 477 #endif // _LP64
 478 #endif // _WINDOWS
 479 
 480     // load value into all 32 bytes of ymm7 register
 481     __ movl(rcx, VM_Version::ymm_test_value());
 482 
 483     __ movdl(xmm0, rcx);
 484     __ pshufd(xmm0, xmm0, 0x00);
 485     __ vinsertf128_high(xmm0, xmm0);
 486     __ vmovdqu(xmm7, xmm0);
 487 #ifdef _LP64
 488     __ vmovdqu(xmm8, xmm0);
 489     __ vmovdqu(xmm15, xmm0);
 490 #endif
 491     VM_Version::clean_cpuFeatures();
 492 
 493     __ bind(save_restore_except);
 494     __ xorl(rsi, rsi);
 495     VM_Version::set_cpuinfo_segv_addr(__ pc());
 496     // Generate SEGV
 497     __ movl(rax, Address(rsi, 0));
 498 
 499     VM_Version::set_cpuinfo_cont_addr(__ pc());
 500     // Returns here after signal. Save xmm0 to check it later.
 501 
 502     // If UseAVX is uninitialized or is set by the user to include EVEX
 503     if (use_evex) {
 504       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 505       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 506       __ movl(rax, 0x10000);
 507       __ andl(rax, Address(rsi, 4));
 508       __ cmpl(rax, 0x10000);
 509       __ jcc(Assembler::notEqual, legacy_save_restore);
 510       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 511       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 512       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 513       __ movl(rax, 0xE0);
 514       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 515       __ cmpl(rax, 0xE0);
 516       __ jcc(Assembler::notEqual, legacy_save_restore);
 517 
 518       if (FLAG_IS_DEFAULT(UseAVX)) {
 519         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 520         __ movl(rax, Address(rsi, 0));
 521         __ cmpl(rax, 0x50654);              // If it is Skylake
 522         __ jcc(Assembler::equal, legacy_save_restore);
 523       }
 524       // EVEX check: run in lowest evex mode
 525       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 526       UseAVX = 3;
 527       UseSSE = 2;
 528       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 529       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 530       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 531 #ifdef _LP64
 532       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 533       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 534 #endif
 535 
 536 #ifdef _WINDOWS
 537 #ifdef _LP64
 538       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 539       __ addptr(rsp, 64);
 540       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 541       __ addptr(rsp, 64);
 542 #endif // _LP64
 543       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 544       __ addptr(rsp, 64);
 545 #endif // _WINDOWS
 546       generate_vzeroupper(wrapup);
 547       VM_Version::clean_cpuFeatures();
 548       UseAVX = saved_useavx;
 549       UseSSE = saved_usesse;
 550       __ jmp(wrapup);
 551    }
 552 
 553     __ bind(legacy_save_restore);
 554     // AVX check
 555     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 556     UseAVX = 1;
 557     UseSSE = 2;
 558     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 559     __ vmovdqu(Address(rsi, 0), xmm0);
 560     __ vmovdqu(Address(rsi, 32), xmm7);
 561 #ifdef _LP64
 562     __ vmovdqu(Address(rsi, 64), xmm8);
 563     __ vmovdqu(Address(rsi, 96), xmm15);
 564 #endif
 565 
 566 #ifdef _WINDOWS
 567 #ifdef _LP64
 568     __ vmovdqu(xmm15, Address(rsp, 0));
 569     __ addptr(rsp, 32);
 570     __ vmovdqu(xmm8, Address(rsp, 0));
 571     __ addptr(rsp, 32);
 572 #endif // _LP64
 573     __ vmovdqu(xmm7, Address(rsp, 0));
 574     __ addptr(rsp, 32);
 575 #endif // _WINDOWS
 576     generate_vzeroupper(wrapup);
 577     VM_Version::clean_cpuFeatures();
 578     UseAVX = saved_useavx;
 579     UseSSE = saved_usesse;
 580 
 581     __ bind(wrapup);
 582     __ popf();
 583     __ pop(rsi);
 584     __ pop(rbx);
 585     __ pop(rbp);
 586     __ ret(0);
 587 
 588 #   undef __
 589 
 590     return start;
 591   };
 592   void generate_vzeroupper(Label& L_wrapup) {
 593 #   define __ _masm->
 594     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 595     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 596     __ jcc(Assembler::notEqual, L_wrapup);
 597     __ movl(rcx, 0x0FFF0FF0);
 598     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 599     __ andl(rcx, Address(rsi, 0));
 600     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 601     __ jcc(Assembler::equal, L_wrapup);
 602     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 603     __ jcc(Assembler::equal, L_wrapup);
 604     // vzeroupper() will use a pre-computed instruction sequence that we
 605     // can't compute until after we've determined CPU capabilities. Use
 606     // uncached variant here directly to be able to bootstrap correctly
 607     __ vzeroupper_uncached();
 608 #   undef __
 609   }
 610   address generate_detect_virt() {
 611     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 612 #   define __ _masm->
 613 
 614     address start = __ pc();
 615 
 616     // Evacuate callee-saved registers
 617     __ push(rbp);
 618     __ push(rbx);
 619     __ push(rsi); // for Windows
 620 
 621 #ifdef _LP64
 622     __ mov(rax, c_rarg0); // CPUID leaf
 623     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 624 #else
 625     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 626     __ movptr(rsi, Address(rsp, 20)); // register array address
 627 #endif
 628 
 629     __ cpuid();
 630 
 631     // Store result to register array
 632     __ movl(Address(rsi,  0), rax);
 633     __ movl(Address(rsi,  4), rbx);
 634     __ movl(Address(rsi,  8), rcx);
 635     __ movl(Address(rsi, 12), rdx);
 636 
 637     // Epilogue
 638     __ pop(rsi);
 639     __ pop(rbx);
 640     __ pop(rbp);
 641     __ ret(0);
 642 
 643 #   undef __
 644 
 645     return start;
 646   };
 647 
 648 
 649   address generate_getCPUIDBrandString(void) {
 650     // Flags to test CPU type.
 651     const uint32_t HS_EFL_AC           = 0x40000;
 652     const uint32_t HS_EFL_ID           = 0x200000;
 653     // Values for when we don't have a CPUID instruction.
 654     const int      CPU_FAMILY_SHIFT = 8;
 655     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 656     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 657 
 658     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 659 
 660     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 661 #   define __ _masm->
 662 
 663     address start = __ pc();
 664 
 665     //
 666     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 667     //
 668     // LP64: rcx and rdx are first and second argument registers on windows
 669 
 670     __ push(rbp);
 671 #ifdef _LP64
 672     __ mov(rbp, c_rarg0); // cpuid_info address
 673 #else
 674     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 675 #endif
 676     __ push(rbx);
 677     __ push(rsi);
 678     __ pushf();          // preserve rbx, and flags
 679     __ pop(rax);
 680     __ push(rax);
 681     __ mov(rcx, rax);
 682     //
 683     // if we are unable to change the AC flag, we have a 386
 684     //
 685     __ xorl(rax, HS_EFL_AC);
 686     __ push(rax);
 687     __ popf();
 688     __ pushf();
 689     __ pop(rax);
 690     __ cmpptr(rax, rcx);
 691     __ jccb(Assembler::notEqual, detect_486);
 692 
 693     __ movl(rax, CPU_FAMILY_386);
 694     __ jmp(done);
 695 
 696     //
 697     // If we are unable to change the ID flag, we have a 486 which does
 698     // not support the "cpuid" instruction.
 699     //
 700     __ bind(detect_486);
 701     __ mov(rax, rcx);
 702     __ xorl(rax, HS_EFL_ID);
 703     __ push(rax);
 704     __ popf();
 705     __ pushf();
 706     __ pop(rax);
 707     __ cmpptr(rcx, rax);
 708     __ jccb(Assembler::notEqual, detect_586);
 709 
 710     __ bind(cpu486);
 711     __ movl(rax, CPU_FAMILY_486);
 712     __ jmp(done);
 713 
 714     //
 715     // At this point, we have a chip which supports the "cpuid" instruction
 716     //
 717     __ bind(detect_586);
 718     __ xorl(rax, rax);
 719     __ cpuid();
 720     __ orl(rax, rax);
 721     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 722                                         // value of at least 1, we give up and
 723                                         // assume a 486
 724 
 725     //
 726     // Extended cpuid(0x80000000) for processor brand string detection
 727     //
 728     __ bind(ext_cpuid);
 729     __ movl(rax, CPUID_EXTENDED_FN);
 730     __ cpuid();
 731     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 732     __ jcc(Assembler::below, done);
 733 
 734     //
 735     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 736     //
 737     __ movl(rax, CPUID_EXTENDED_FN_2);
 738     __ cpuid();
 739     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 740     __ movl(Address(rsi, 0), rax);
 741     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 742     __ movl(Address(rsi, 0), rbx);
 743     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 744     __ movl(Address(rsi, 0), rcx);
 745     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 746     __ movl(Address(rsi,0), rdx);
 747 
 748     //
 749     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 750     //
 751     __ movl(rax, CPUID_EXTENDED_FN_3);
 752     __ cpuid();
 753     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 754     __ movl(Address(rsi, 0), rax);
 755     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 756     __ movl(Address(rsi, 0), rbx);
 757     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 758     __ movl(Address(rsi, 0), rcx);
 759     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 760     __ movl(Address(rsi,0), rdx);
 761 
 762     //
 763     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 764     //
 765     __ movl(rax, CPUID_EXTENDED_FN_4);
 766     __ cpuid();
 767     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 768     __ movl(Address(rsi, 0), rax);
 769     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 770     __ movl(Address(rsi, 0), rbx);
 771     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 772     __ movl(Address(rsi, 0), rcx);
 773     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 774     __ movl(Address(rsi,0), rdx);
 775 
 776     //
 777     // return
 778     //
 779     __ bind(done);
 780     __ popf();
 781     __ pop(rsi);
 782     __ pop(rbx);
 783     __ pop(rbp);
 784     __ ret(0);
 785 
 786 #   undef __
 787 
 788     return start;
 789   };
 790 };
 791 
 792 void VM_Version::get_processor_features() {
 793 
 794   _cpu = 4; // 486 by default
 795   _model = 0;
 796   _stepping = 0;
 797   _features = 0;
 798   _logical_processors_per_package = 1;
 799   // i486 internal cache is both I&D and has a 16-byte line size
 800   _L1_data_cache_line_size = 16;
 801 
 802   // Get raw processor info
 803 
 804   get_cpu_info_stub(&_cpuid_info);
 805 
 806   assert_is_initialized();
 807   _cpu = extended_cpu_family();
 808   _model = extended_cpu_model();
 809   _stepping = cpu_stepping();
 810 
 811   if (cpu_family() > 4) { // it supports CPUID
 812     _features = feature_flags();
 813     // Logical processors are only available on P4s and above,
 814     // and only if hyperthreading is available.
 815     _logical_processors_per_package = logical_processor_count();
 816     _L1_data_cache_line_size = L1_line_size();
 817   }
 818 
 819   // xchg and xadd instructions
 820   _supports_atomic_getset4 = true;
 821   _supports_atomic_getadd4 = true;
 822   LP64_ONLY(_supports_atomic_getset8 = true);
 823   LP64_ONLY(_supports_atomic_getadd8 = true);
 824 
 825 #ifdef _LP64
 826   // OS should support SSE for x64 and hardware should support at least SSE2.
 827   if (!VM_Version::supports_sse2()) {
 828     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 829   }
 830   // in 64 bit the use of SSE2 is the minimum
 831   if (UseSSE < 2) UseSSE = 2;
 832 #endif
 833 
 834 #ifdef AMD64
 835   // flush_icache_stub have to be generated first.
 836   // That is why Icache line size is hard coded in ICache class,
 837   // see icache_x86.hpp. It is also the reason why we can't use
 838   // clflush instruction in 32-bit VM since it could be running
 839   // on CPU which does not support it.
 840   //
 841   // The only thing we can do is to verify that flushed
 842   // ICache::line_size has correct value.
 843   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 844   // clflush_size is size in quadwords (8 bytes).
 845   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 846 #endif
 847 
 848 #ifdef _LP64
 849   // assigning this field effectively enables Unsafe.writebackMemory()
 850   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 851   // that is only implemented on x86_64 and only if the OS plays ball
 852   if (os::supports_map_sync()) {
 853     // publish data cache line flush size to generic field, otherwise
 854     // let if default to zero thereby disabling writeback
 855     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 856   }
 857 #endif
 858 
 859   // Check if processor has Intel Ecore
 860   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 861     (_model == 0x97 || _model == 0xAC || _model == 0xAF)) {
 862     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 863   }
 864 
 865   if (UseSSE < 4) {
 866     _features &= ~CPU_SSE4_1;
 867     _features &= ~CPU_SSE4_2;
 868   }
 869 
 870   if (UseSSE < 3) {
 871     _features &= ~CPU_SSE3;
 872     _features &= ~CPU_SSSE3;
 873     _features &= ~CPU_SSE4A;
 874   }
 875 
 876   if (UseSSE < 2)
 877     _features &= ~CPU_SSE2;
 878 
 879   if (UseSSE < 1)
 880     _features &= ~CPU_SSE;
 881 
 882   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 883   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 884     UseAVX = 0;
 885   }
 886 
 887   // UseSSE is set to the smaller of what hardware supports and what
 888   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 889   // older Pentiums which do not support it.
 890   int use_sse_limit = 0;
 891   if (UseSSE > 0) {
 892     if (UseSSE > 3 && supports_sse4_1()) {
 893       use_sse_limit = 4;
 894     } else if (UseSSE > 2 && supports_sse3()) {
 895       use_sse_limit = 3;
 896     } else if (UseSSE > 1 && supports_sse2()) {
 897       use_sse_limit = 2;
 898     } else if (UseSSE > 0 && supports_sse()) {
 899       use_sse_limit = 1;
 900     } else {
 901       use_sse_limit = 0;
 902     }
 903   }
 904   if (FLAG_IS_DEFAULT(UseSSE)) {
 905     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 906   } else if (UseSSE > use_sse_limit) {
 907     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 908     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 909   }
 910 
 911   // first try initial setting and detect what we can support
 912   int use_avx_limit = 0;
 913   if (UseAVX > 0) {
 914     if (UseSSE < 4) {
 915       // Don't use AVX if SSE is unavailable or has been disabled.
 916       use_avx_limit = 0;
 917     } else if (UseAVX > 2 && supports_evex()) {
 918       use_avx_limit = 3;
 919     } else if (UseAVX > 1 && supports_avx2()) {
 920       use_avx_limit = 2;
 921     } else if (UseAVX > 0 && supports_avx()) {
 922       use_avx_limit = 1;
 923     } else {
 924       use_avx_limit = 0;
 925     }
 926   }
 927   if (FLAG_IS_DEFAULT(UseAVX)) {
 928     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 929     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 930       FLAG_SET_DEFAULT(UseAVX, 2);
 931     } else {
 932       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 933     }
 934   }
 935   if (UseAVX > use_avx_limit) {
 936     if (UseSSE < 4) {
 937       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 938     } else {
 939       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 940     }
 941     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 942   }
 943 
 944   if (UseAVX < 3) {
 945     _features &= ~CPU_AVX512F;
 946     _features &= ~CPU_AVX512DQ;
 947     _features &= ~CPU_AVX512CD;
 948     _features &= ~CPU_AVX512BW;
 949     _features &= ~CPU_AVX512VL;
 950     _features &= ~CPU_AVX512_VPOPCNTDQ;
 951     _features &= ~CPU_AVX512_VPCLMULQDQ;
 952     _features &= ~CPU_AVX512_VAES;
 953     _features &= ~CPU_AVX512_VNNI;
 954     _features &= ~CPU_AVX512_VBMI;
 955     _features &= ~CPU_AVX512_VBMI2;
 956     _features &= ~CPU_AVX512_BITALG;
 957     _features &= ~CPU_AVX512_IFMA;
 958   }
 959 
 960   if (UseAVX < 2)
 961     _features &= ~CPU_AVX2;
 962 
 963   if (UseAVX < 1) {
 964     _features &= ~CPU_AVX;
 965     _features &= ~CPU_VZEROUPPER;
 966     _features &= ~CPU_F16C;
 967   }
 968 
 969   if (logical_processors_per_package() == 1) {
 970     // HT processor could be installed on a system which doesn't support HT.
 971     _features &= ~CPU_HT;
 972   }
 973 
 974   if (is_intel()) { // Intel cpus specific settings
 975     if (is_knights_family()) {
 976       _features &= ~CPU_VZEROUPPER;
 977       _features &= ~CPU_AVX512BW;
 978       _features &= ~CPU_AVX512VL;
 979       _features &= ~CPU_AVX512DQ;
 980       _features &= ~CPU_AVX512_VNNI;
 981       _features &= ~CPU_AVX512_VAES;
 982       _features &= ~CPU_AVX512_VPOPCNTDQ;
 983       _features &= ~CPU_AVX512_VPCLMULQDQ;
 984       _features &= ~CPU_AVX512_VBMI;
 985       _features &= ~CPU_AVX512_VBMI2;
 986       _features &= ~CPU_CLWB;
 987       _features &= ~CPU_FLUSHOPT;
 988       _features &= ~CPU_GFNI;
 989       _features &= ~CPU_AVX512_BITALG;
 990       _features &= ~CPU_AVX512_IFMA;
 991     }
 992   }
 993 
 994   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
 995     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
 996   } else {
 997     _has_intel_jcc_erratum = IntelJccErratumMitigation;
 998   }
 999 
1000   char buf[1024];
1001   int res = jio_snprintf(
1002               buf, sizeof(buf),
1003               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1004               cores_per_cpu(), threads_per_core(),
1005               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1006   assert(res > 0, "not enough temporary space allocated");
1007   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1008 
1009   _features_string = os::strdup(buf);
1010 
1011   // Use AES instructions if available.
1012   if (supports_aes()) {
1013     if (FLAG_IS_DEFAULT(UseAES)) {
1014       FLAG_SET_DEFAULT(UseAES, true);
1015     }
1016     if (!UseAES) {
1017       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1018         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1019       }
1020       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1021     } else {
1022       if (UseSSE > 2) {
1023         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1024           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1025         }
1026       } else {
1027         // The AES intrinsic stubs require AES instruction support (of course)
1028         // but also require sse3 mode or higher for instructions it use.
1029         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1030           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1031         }
1032         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1033       }
1034 
1035       // --AES-CTR begins--
1036       if (!UseAESIntrinsics) {
1037         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1038           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1039           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1040         }
1041       } else {
1042         if (supports_sse4_1()) {
1043           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1044             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1045           }
1046         } else {
1047            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1048            // but also require sse4.1 mode or higher for instructions it use.
1049           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1050              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1051            }
1052            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1053         }
1054       }
1055       // --AES-CTR ends--
1056     }
1057   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1058     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1059       warning("AES instructions are not available on this CPU");
1060       FLAG_SET_DEFAULT(UseAES, false);
1061     }
1062     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1063       warning("AES intrinsics are not available on this CPU");
1064       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1065     }
1066     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1067       warning("AES-CTR intrinsics are not available on this CPU");
1068       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1069     }
1070   }
1071 
1072   // Use CLMUL instructions if available.
1073   if (supports_clmul()) {
1074     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1075       UseCLMUL = true;
1076     }
1077   } else if (UseCLMUL) {
1078     if (!FLAG_IS_DEFAULT(UseCLMUL))
1079       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1080     FLAG_SET_DEFAULT(UseCLMUL, false);
1081   }
1082 
1083   if (UseCLMUL && (UseSSE > 2)) {
1084     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1085       UseCRC32Intrinsics = true;
1086     }
1087   } else if (UseCRC32Intrinsics) {
1088     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1089       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1090     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1091   }
1092 
1093 #ifdef _LP64
1094   if (supports_avx2()) {
1095     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1096       UseAdler32Intrinsics = true;
1097     }
1098   } else if (UseAdler32Intrinsics) {
1099     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1100       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1101     }
1102     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1103   }
1104 #else
1105   if (UseAdler32Intrinsics) {
1106     warning("Adler32Intrinsics not available on this CPU.");
1107     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1108   }
1109 #endif
1110 
1111   if (supports_sse4_2() && supports_clmul()) {
1112     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1113       UseCRC32CIntrinsics = true;
1114     }
1115   } else if (UseCRC32CIntrinsics) {
1116     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1117       warning("CRC32C intrinsics are not available on this CPU");
1118     }
1119     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1120   }
1121 
1122   // GHASH/GCM intrinsics
1123   if (UseCLMUL && (UseSSE > 2)) {
1124     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1125       UseGHASHIntrinsics = true;
1126     }
1127   } else if (UseGHASHIntrinsics) {
1128     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1129       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1130     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1131   }
1132 
1133   // ChaCha20 Intrinsics
1134   // As long as the system supports AVX as a baseline we can do a
1135   // SIMD-enabled block function.  StubGenerator makes the determination
1136   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1137   // version.
1138   if (UseAVX >= 1) {
1139       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1140           UseChaCha20Intrinsics = true;
1141       }
1142   } else if (UseChaCha20Intrinsics) {
1143       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1144           warning("ChaCha20 intrinsic requires AVX instructions");
1145       }
1146       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1147   }
1148 
1149   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1150   if (UseAVX >= 2) {
1151     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1152       UseBASE64Intrinsics = true;
1153     }
1154   } else if (UseBASE64Intrinsics) {
1155      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1156       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1157     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1158   }
1159 
1160   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1161     if (FLAG_IS_DEFAULT(UseFMA)) {
1162       UseFMA = true;
1163     }
1164   } else if (UseFMA) {
1165     warning("FMA instructions are not available on this CPU");
1166     FLAG_SET_DEFAULT(UseFMA, false);
1167   }
1168 
1169   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1170     UseMD5Intrinsics = true;
1171   }
1172 
1173   if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
1174     if (FLAG_IS_DEFAULT(UseSHA)) {
1175       UseSHA = true;
1176     }
1177   } else if (UseSHA) {
1178     warning("SHA instructions are not available on this CPU");
1179     FLAG_SET_DEFAULT(UseSHA, false);
1180   }
1181 
1182   if (supports_sha() && supports_sse4_1() && UseSHA) {
1183     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1184       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1185     }
1186   } else if (UseSHA1Intrinsics) {
1187     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1188     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1189   }
1190 
1191   if (supports_sse4_1() && UseSHA) {
1192     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1193       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1194     }
1195   } else if (UseSHA256Intrinsics) {
1196     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1197     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1198   }
1199 
1200 #ifdef _LP64
1201   // These are only supported on 64-bit
1202   if (UseSHA && supports_avx2() && supports_bmi2()) {
1203     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1204       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1205     }
1206   } else
1207 #endif
1208   if (UseSHA512Intrinsics) {
1209     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1210     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1211   }
1212 
1213   if (UseSHA3Intrinsics) {
1214     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1215     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1216   }
1217 
1218   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1219     FLAG_SET_DEFAULT(UseSHA, false);
1220   }
1221 
1222   if (!supports_rtm() && UseRTMLocking) {
1223     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1224   }
1225 
1226 #if INCLUDE_RTM_OPT
1227   if (UseRTMLocking) {
1228     if (!CompilerConfig::is_c2_enabled()) {
1229       // Only C2 does RTM locking optimization.
1230       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1231     }
1232     if (is_intel_family_core()) {
1233       if ((_model == CPU_MODEL_HASWELL_E3) ||
1234           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1235           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1236         // currently a collision between SKL and HSW_E3
1237         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1238           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1239                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1240         } else {
1241           warning("UseRTMLocking is only available as experimental option on this platform.");
1242         }
1243       }
1244     }
1245     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1246       // RTM locking should be used only for applications with
1247       // high lock contention. For now we do not use it by default.
1248       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1249     }
1250   } else { // !UseRTMLocking
1251     if (UseRTMForStackLocks) {
1252       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1253         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1254       }
1255       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1256     }
1257     if (UseRTMDeopt) {
1258       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1259     }
1260     if (PrintPreciseRTMLockingStatistics) {
1261       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1262     }
1263   }
1264 #else
1265   if (UseRTMLocking) {
1266     // Only C2 does RTM locking optimization.
1267     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1268   }
1269 #endif
1270 
1271 #ifdef COMPILER2
1272   if (UseFPUForSpilling) {
1273     if (UseSSE < 2) {
1274       // Only supported with SSE2+
1275       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1276     }
1277   }
1278 #endif
1279 
1280 #if COMPILER2_OR_JVMCI
1281   int max_vector_size = 0;
1282   if (UseSSE < 2) {
1283     // Vectors (in XMM) are only supported with SSE2+
1284     // SSE is always 2 on x64.
1285     max_vector_size = 0;
1286   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1287     // 16 byte vectors (in XMM) are supported with SSE2+
1288     max_vector_size = 16;
1289   } else if (UseAVX == 1 || UseAVX == 2) {
1290     // 32 bytes vectors (in YMM) are only supported with AVX+
1291     max_vector_size = 32;
1292   } else if (UseAVX > 2) {
1293     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1294     max_vector_size = 64;
1295   }
1296 
1297 #ifdef _LP64
1298   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1299 #else
1300   int min_vector_size = 0;
1301 #endif
1302 
1303   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1304     if (MaxVectorSize < min_vector_size) {
1305       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1306       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1307     }
1308     if (MaxVectorSize > max_vector_size) {
1309       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1310       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1311     }
1312     if (!is_power_of_2(MaxVectorSize)) {
1313       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1314       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1315     }
1316   } else {
1317     // If default, use highest supported configuration
1318     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1319   }
1320 
1321 #if defined(COMPILER2) && defined(ASSERT)
1322   if (MaxVectorSize > 0) {
1323     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1324       tty->print_cr("State of YMM registers after signal handle:");
1325       int nreg = 2 LP64_ONLY(+2);
1326       const char* ymm_name[4] = {"0", "7", "8", "15"};
1327       for (int i = 0; i < nreg; i++) {
1328         tty->print("YMM%s:", ymm_name[i]);
1329         for (int j = 7; j >=0; j--) {
1330           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1331         }
1332         tty->cr();
1333       }
1334     }
1335   }
1336 #endif // COMPILER2 && ASSERT
1337 
1338 #ifdef _LP64
1339   if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) {
1340     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1341       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1342     }
1343   } else
1344 #endif
1345   if (UsePoly1305Intrinsics) {
1346     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1347     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1348   }
1349 
1350 #ifdef _LP64
1351   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1352     UseMultiplyToLenIntrinsic = true;
1353   }
1354   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1355     UseSquareToLenIntrinsic = true;
1356   }
1357   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1358     UseMulAddIntrinsic = true;
1359   }
1360   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1361     UseMontgomeryMultiplyIntrinsic = true;
1362   }
1363   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1364     UseMontgomerySquareIntrinsic = true;
1365   }
1366 #else
1367   if (UseMultiplyToLenIntrinsic) {
1368     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1369       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1370     }
1371     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1372   }
1373   if (UseMontgomeryMultiplyIntrinsic) {
1374     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1375       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1376     }
1377     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1378   }
1379   if (UseMontgomerySquareIntrinsic) {
1380     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1381       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1382     }
1383     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1384   }
1385   if (UseSquareToLenIntrinsic) {
1386     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1387       warning("squareToLen intrinsic is not available in 32-bit VM");
1388     }
1389     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1390   }
1391   if (UseMulAddIntrinsic) {
1392     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1393       warning("mulAdd intrinsic is not available in 32-bit VM");
1394     }
1395     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1396   }
1397 #endif // _LP64
1398 #endif // COMPILER2_OR_JVMCI
1399 
1400   // On new cpus instructions which update whole XMM register should be used
1401   // to prevent partial register stall due to dependencies on high half.
1402   //
1403   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1404   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1405   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1406   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1407 
1408 
1409   if (is_zx()) { // ZX cpus specific settings
1410     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1411       UseStoreImmI16 = false; // don't use it on ZX cpus
1412     }
1413     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1414       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1415         // Use it on all ZX cpus
1416         UseAddressNop = true;
1417       }
1418     }
1419     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1420       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1421     }
1422     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1423       if (supports_sse3()) {
1424         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1425       } else {
1426         UseXmmRegToRegMoveAll = false;
1427       }
1428     }
1429     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1430 #ifdef COMPILER2
1431       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1432         // For new ZX cpus do the next optimization:
1433         // don't align the beginning of a loop if there are enough instructions
1434         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1435         // in current fetch line (OptoLoopAlignment) or the padding
1436         // is big (> MaxLoopPad).
1437         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1438         // generated NOP instructions. 11 is the largest size of one
1439         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1440         MaxLoopPad = 11;
1441       }
1442 #endif // COMPILER2
1443       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1444         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1445       }
1446       if (supports_sse4_2()) { // new ZX cpus
1447         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1448           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1449         }
1450       }
1451       if (supports_sse4_2()) {
1452         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1453           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1454         }
1455       } else {
1456         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1457           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1458         }
1459         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1460       }
1461     }
1462 
1463     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1464       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1465     }
1466   }
1467 
1468   if (is_amd_family()) { // AMD cpus specific settings
1469     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1470       // Use it on new AMD cpus starting from Opteron.
1471       UseAddressNop = true;
1472     }
1473     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1474       // Use it on new AMD cpus starting from Opteron.
1475       UseNewLongLShift = true;
1476     }
1477     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1478       if (supports_sse4a()) {
1479         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1480       } else {
1481         UseXmmLoadAndClearUpper = false;
1482       }
1483     }
1484     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1485       if (supports_sse4a()) {
1486         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1487       } else {
1488         UseXmmRegToRegMoveAll = false;
1489       }
1490     }
1491     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1492       if (supports_sse4a()) {
1493         UseXmmI2F = true;
1494       } else {
1495         UseXmmI2F = false;
1496       }
1497     }
1498     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1499       if (supports_sse4a()) {
1500         UseXmmI2D = true;
1501       } else {
1502         UseXmmI2D = false;
1503       }
1504     }
1505     if (supports_sse4_2()) {
1506       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1507         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1508       }
1509     } else {
1510       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1511         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1512       }
1513       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1514     }
1515 
1516     // some defaults for AMD family 15h
1517     if (cpu_family() == 0x15) {
1518       // On family 15h processors default is no sw prefetch
1519       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1520         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1521       }
1522       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1523       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1524         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1525       }
1526       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1527       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1528         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1529       }
1530       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1531         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1532       }
1533     }
1534 
1535 #ifdef COMPILER2
1536     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1537       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1538       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1539     }
1540 #endif // COMPILER2
1541 
1542     // Some defaults for AMD family >= 17h && Hygon family 18h
1543     if (cpu_family() >= 0x17) {
1544       // On family >=17h processors use XMM and UnalignedLoadStores
1545       // for Array Copy
1546       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1547         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1548       }
1549       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1550         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1551       }
1552 #ifdef COMPILER2
1553       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1554         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1555       }
1556 #endif
1557     }
1558   }
1559 
1560   if (is_intel()) { // Intel cpus specific settings
1561     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1562       UseStoreImmI16 = false; // don't use it on Intel cpus
1563     }
1564     if (cpu_family() == 6 || cpu_family() == 15) {
1565       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1566         // Use it on all Intel cpus starting from PentiumPro
1567         UseAddressNop = true;
1568       }
1569     }
1570     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1571       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1572     }
1573     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1574       if (supports_sse3()) {
1575         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1576       } else {
1577         UseXmmRegToRegMoveAll = false;
1578       }
1579     }
1580     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1581 #ifdef COMPILER2
1582       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1583         // For new Intel cpus do the next optimization:
1584         // don't align the beginning of a loop if there are enough instructions
1585         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1586         // in current fetch line (OptoLoopAlignment) or the padding
1587         // is big (> MaxLoopPad).
1588         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1589         // generated NOP instructions. 11 is the largest size of one
1590         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1591         MaxLoopPad = 11;
1592       }
1593 #endif // COMPILER2
1594 
1595       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1596         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1597       }
1598       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1599         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1600           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1601         }
1602       }
1603       if (supports_sse4_2()) {
1604         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1605           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1606         }
1607       } else {
1608         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1609           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1610         }
1611         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1612       }
1613     }
1614     if (is_atom_family() || is_knights_family()) {
1615 #ifdef COMPILER2
1616       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1617         OptoScheduling = true;
1618       }
1619 #endif
1620       if (supports_sse4_2()) { // Silvermont
1621         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1622           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1623         }
1624       }
1625       if (FLAG_IS_DEFAULT(UseIncDec)) {
1626         FLAG_SET_DEFAULT(UseIncDec, false);
1627       }
1628     }
1629     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1630       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1631     }
1632 #ifdef COMPILER2
1633     if (UseAVX > 2) {
1634       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1635           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1636            ArrayOperationPartialInlineSize != 0 &&
1637            ArrayOperationPartialInlineSize != 16 &&
1638            ArrayOperationPartialInlineSize != 32 &&
1639            ArrayOperationPartialInlineSize != 64)) {
1640         int inline_size = 0;
1641         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1642           inline_size = 64;
1643         } else if (MaxVectorSize >= 32) {
1644           inline_size = 32;
1645         } else if (MaxVectorSize >= 16) {
1646           inline_size = 16;
1647         }
1648         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1649           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1650         }
1651         ArrayOperationPartialInlineSize = inline_size;
1652       }
1653 
1654       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1655         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1656         if (ArrayOperationPartialInlineSize) {
1657           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1658         } else {
1659           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1660         }
1661       }
1662     }
1663 #endif
1664   }
1665 
1666 #ifdef COMPILER2
1667   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1668     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1669       OptimizeFill = false;
1670     }
1671   }
1672 #endif
1673 
1674 #ifdef _LP64
1675   if (UseSSE42Intrinsics) {
1676     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1677       UseVectorizedMismatchIntrinsic = true;
1678     }
1679   } else if (UseVectorizedMismatchIntrinsic) {
1680     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1681       warning("vectorizedMismatch intrinsics are not available on this CPU");
1682     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1683   }
1684   if (UseAVX >= 2) {
1685     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1686   } else if (UseVectorizedHashCodeIntrinsic) {
1687     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1688       warning("vectorizedHashCode intrinsics are not available on this CPU");
1689     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1690   }
1691 #else
1692   if (UseVectorizedMismatchIntrinsic) {
1693     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1694       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1695     }
1696     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1697   }
1698   if (UseVectorizedHashCodeIntrinsic) {
1699     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1700       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1701     }
1702     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1703   }
1704 #endif // _LP64
1705 
1706   // Use count leading zeros count instruction if available.
1707   if (supports_lzcnt()) {
1708     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1709       UseCountLeadingZerosInstruction = true;
1710     }
1711    } else if (UseCountLeadingZerosInstruction) {
1712     warning("lzcnt instruction is not available on this CPU");
1713     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1714   }
1715 
1716   // Use count trailing zeros instruction if available
1717   if (supports_bmi1()) {
1718     // tzcnt does not require VEX prefix
1719     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1720       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1721         // Don't use tzcnt if BMI1 is switched off on command line.
1722         UseCountTrailingZerosInstruction = false;
1723       } else {
1724         UseCountTrailingZerosInstruction = true;
1725       }
1726     }
1727   } else if (UseCountTrailingZerosInstruction) {
1728     warning("tzcnt instruction is not available on this CPU");
1729     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1730   }
1731 
1732   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1733   // VEX prefix is generated only when AVX > 0.
1734   if (supports_bmi1() && supports_avx()) {
1735     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1736       UseBMI1Instructions = true;
1737     }
1738   } else if (UseBMI1Instructions) {
1739     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1740     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1741   }
1742 
1743   if (supports_bmi2() && supports_avx()) {
1744     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1745       UseBMI2Instructions = true;
1746     }
1747   } else if (UseBMI2Instructions) {
1748     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1749     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1750   }
1751 
1752   // Use population count instruction if available.
1753   if (supports_popcnt()) {
1754     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1755       UsePopCountInstruction = true;
1756     }
1757   } else if (UsePopCountInstruction) {
1758     warning("POPCNT instruction is not available on this CPU");
1759     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1760   }
1761 
1762   // Use fast-string operations if available.
1763   if (supports_erms()) {
1764     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1765       UseFastStosb = true;
1766     }
1767   } else if (UseFastStosb) {
1768     warning("fast-string operations are not available on this CPU");
1769     FLAG_SET_DEFAULT(UseFastStosb, false);
1770   }
1771 
1772   // For AMD Processors use XMM/YMM MOVDQU instructions
1773   // for Object Initialization as default
1774   if (is_amd() && cpu_family() >= 0x19) {
1775     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1776       UseFastStosb = false;
1777     }
1778   }
1779 
1780 #ifdef COMPILER2
1781   if (is_intel() && MaxVectorSize > 16) {
1782     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1783       UseFastStosb = false;
1784     }
1785   }
1786 #endif
1787 
1788   // Use XMM/YMM MOVDQU instruction for Object Initialization
1789   if (UseSSE >= 2 && UseUnalignedLoadStores) {
1790     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1791       UseXMMForObjInit = true;
1792     }
1793   } else if (UseXMMForObjInit) {
1794     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1795     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1796   }
1797 
1798 #ifdef COMPILER2
1799   if (FLAG_IS_DEFAULT(AlignVector)) {
1800     // Modern processors allow misaligned memory operations for vectors.
1801     AlignVector = !UseUnalignedLoadStores;
1802   }
1803 #endif // COMPILER2
1804 
1805   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1806     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1807       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1808     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1809       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1810     }
1811   }
1812 
1813   // Allocation prefetch settings
1814   int cache_line_size = checked_cast<int>(prefetch_data_size());
1815   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1816       (cache_line_size > AllocatePrefetchStepSize)) {
1817     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1818   }
1819 
1820   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1821     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1822     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1823       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1824     }
1825     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1826   }
1827 
1828   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1829     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1830     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1831   }
1832 
1833   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1834     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1835         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1836       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1837     }
1838 #ifdef COMPILER2
1839     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1840       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1841     }
1842 #endif
1843   }
1844 
1845   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1846 #ifdef COMPILER2
1847     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1848       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1849     }
1850 #endif
1851   }
1852 
1853 #ifdef _LP64
1854   // Prefetch settings
1855 
1856   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1857   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1858   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1859   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1860 
1861   // gc copy/scan is disabled if prefetchw isn't supported, because
1862   // Prefetch::write emits an inlined prefetchw on Linux.
1863   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1864   // The used prefetcht0 instruction works for both amd64 and em64t.
1865 
1866   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1867     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1868   }
1869   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1870     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1871   }
1872 #endif
1873 
1874   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1875      (cache_line_size > ContendedPaddingWidth))
1876      ContendedPaddingWidth = cache_line_size;
1877 
1878   // This machine allows unaligned memory accesses
1879   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1880     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1881   }
1882 
1883 #ifndef PRODUCT
1884   if (log_is_enabled(Info, os, cpu)) {
1885     LogStream ls(Log(os, cpu)::info());
1886     outputStream* log = &ls;
1887     log->print_cr("Logical CPUs per core: %u",
1888                   logical_processors_per_package());
1889     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1890     log->print("UseSSE=%d", UseSSE);
1891     if (UseAVX > 0) {
1892       log->print("  UseAVX=%d", UseAVX);
1893     }
1894     if (UseAES) {
1895       log->print("  UseAES=1");
1896     }
1897 #ifdef COMPILER2
1898     if (MaxVectorSize > 0) {
1899       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1900     }
1901 #endif
1902     log->cr();
1903     log->print("Allocation");
1904     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1905       log->print_cr(": no prefetching");
1906     } else {
1907       log->print(" prefetching: ");
1908       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1909         log->print("PREFETCHW");
1910       } else if (UseSSE >= 1) {
1911         if (AllocatePrefetchInstr == 0) {
1912           log->print("PREFETCHNTA");
1913         } else if (AllocatePrefetchInstr == 1) {
1914           log->print("PREFETCHT0");
1915         } else if (AllocatePrefetchInstr == 2) {
1916           log->print("PREFETCHT2");
1917         } else if (AllocatePrefetchInstr == 3) {
1918           log->print("PREFETCHW");
1919         }
1920       }
1921       if (AllocatePrefetchLines > 1) {
1922         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1923       } else {
1924         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1925       }
1926     }
1927 
1928     if (PrefetchCopyIntervalInBytes > 0) {
1929       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1930     }
1931     if (PrefetchScanIntervalInBytes > 0) {
1932       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1933     }
1934     if (ContendedPaddingWidth > 0) {
1935       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1936     }
1937   }
1938 #endif // !PRODUCT
1939   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1940       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1941   }
1942   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1943       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1944   }
1945 }
1946 
1947 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1948   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1949   if (vrt == XenHVM) {
1950     st->print_cr("Xen hardware-assisted virtualization detected");
1951   } else if (vrt == KVM) {
1952     st->print_cr("KVM virtualization detected");
1953   } else if (vrt == VMWare) {
1954     st->print_cr("VMWare virtualization detected");
1955     VirtualizationSupport::print_virtualization_info(st);
1956   } else if (vrt == HyperV) {
1957     st->print_cr("Hyper-V virtualization detected");
1958   } else if (vrt == HyperVRole) {
1959     st->print_cr("Hyper-V role detected");
1960   }
1961 }
1962 
1963 bool VM_Version::compute_has_intel_jcc_erratum() {
1964   if (!is_intel_family_core()) {
1965     // Only Intel CPUs are affected.
1966     return false;
1967   }
1968   // The following table of affected CPUs is based on the following document released by Intel:
1969   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1970   switch (_model) {
1971   case 0x8E:
1972     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1973     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1974     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1975     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1976     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1977     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1978     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1979     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1980     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1981     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1982   case 0x4E:
1983     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1984     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1985     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1986     return _stepping == 0x3;
1987   case 0x55:
1988     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1989     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1990     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1991     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1992     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1993     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1994     return _stepping == 0x4 || _stepping == 0x7;
1995   case 0x5E:
1996     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1997     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1998     return _stepping == 0x3;
1999   case 0x9E:
2000     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2001     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2002     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2003     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2004     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2005     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2006     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2007     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2008     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2009     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2010     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2011     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2012     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2013     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2014     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2015   case 0xA5:
2016     // Not in Intel documentation.
2017     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2018     return true;
2019   case 0xA6:
2020     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2021     return _stepping == 0x0;
2022   case 0xAE:
2023     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2024     return _stepping == 0xA;
2025   default:
2026     // If we are running on another intel machine not recognized in the table, we are okay.
2027     return false;
2028   }
2029 }
2030 
2031 // On Xen, the cpuid instruction returns
2032 //  eax / registers[0]: Version of Xen
2033 //  ebx / registers[1]: chars 'XenV'
2034 //  ecx / registers[2]: chars 'MMXe'
2035 //  edx / registers[3]: chars 'nVMM'
2036 //
2037 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2038 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2039 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2040 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2041 //
2042 // more information :
2043 // https://kb.vmware.com/s/article/1009458
2044 //
2045 void VM_Version::check_virtualizations() {
2046   uint32_t registers[4] = {0};
2047   char signature[13] = {0};
2048 
2049   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2050   // from 0x40000000 until 0x40010000.
2051   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2052   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2053     detect_virt_stub(leaf, registers);
2054     memcpy(signature, &registers[1], 12);
2055 
2056     if (strncmp("VMwareVMware", signature, 12) == 0) {
2057       Abstract_VM_Version::_detected_virtualization = VMWare;
2058       // check for extended metrics from guestlib
2059       VirtualizationSupport::initialize();
2060     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2061       Abstract_VM_Version::_detected_virtualization = HyperV;
2062 #ifdef _WINDOWS
2063       // CPUID leaf 0x40000007 is available to the root partition only.
2064       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2065       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2066       detect_virt_stub(0x40000007, registers);
2067       if ((registers[0] != 0x0) ||
2068           (registers[1] != 0x0) ||
2069           (registers[2] != 0x0) ||
2070           (registers[3] != 0x0)) {
2071         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2072       }
2073 #endif
2074     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2075       Abstract_VM_Version::_detected_virtualization = KVM;
2076     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2077       Abstract_VM_Version::_detected_virtualization = XenHVM;
2078     }
2079   }
2080 }
2081 
2082 #ifdef COMPILER2
2083 // Determine if it's running on Cascade Lake using default options.
2084 bool VM_Version::is_default_intel_cascade_lake() {
2085   return FLAG_IS_DEFAULT(UseAVX) &&
2086          FLAG_IS_DEFAULT(MaxVectorSize) &&
2087          UseAVX > 2 &&
2088          is_intel_cascade_lake();
2089 }
2090 #endif
2091 
2092 bool VM_Version::is_intel_cascade_lake() {
2093   return is_intel_skylake() && _stepping >= 5;
2094 }
2095 
2096 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2097 // for implementing the array copy and clear operations.
2098 // The Intel platforms that supports the serialize instruction
2099 // has improved implementation of 64-byte load/stores and so the default
2100 // threshold is set to 0 for these platforms.
2101 int VM_Version::avx3_threshold() {
2102   return (is_intel_family_core() &&
2103           supports_serialize() &&
2104           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2105 }
2106 
2107 static bool _vm_version_initialized = false;
2108 
2109 void VM_Version::initialize() {
2110   ResourceMark rm;
2111   // Making this stub must be FIRST use of assembler
2112   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2113   if (stub_blob == nullptr) {
2114     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2115   }
2116   CodeBuffer c(stub_blob);
2117   VM_Version_StubGenerator g(&c);
2118 
2119   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2120                                      g.generate_get_cpu_info());
2121   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2122                                      g.generate_detect_virt());
2123 
2124   get_processor_features();
2125 
2126   LP64_ONLY(Assembler::precompute_instructions();)
2127 
2128   if (VM_Version::supports_hv()) { // Supports hypervisor
2129     check_virtualizations();
2130   }
2131   _vm_version_initialized = true;
2132 }
2133 
2134 typedef enum {
2135    CPU_FAMILY_8086_8088  = 0,
2136    CPU_FAMILY_INTEL_286  = 2,
2137    CPU_FAMILY_INTEL_386  = 3,
2138    CPU_FAMILY_INTEL_486  = 4,
2139    CPU_FAMILY_PENTIUM    = 5,
2140    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2141    CPU_FAMILY_PENTIUM_4  = 0xF
2142 } FamilyFlag;
2143 
2144 typedef enum {
2145   RDTSCP_FLAG  = 0x08000000, // bit 27
2146   INTEL64_FLAG = 0x20000000  // bit 29
2147 } _featureExtendedEdxFlag;
2148 
2149 typedef enum {
2150    FPU_FLAG     = 0x00000001,
2151    VME_FLAG     = 0x00000002,
2152    DE_FLAG      = 0x00000004,
2153    PSE_FLAG     = 0x00000008,
2154    TSC_FLAG     = 0x00000010,
2155    MSR_FLAG     = 0x00000020,
2156    PAE_FLAG     = 0x00000040,
2157    MCE_FLAG     = 0x00000080,
2158    CX8_FLAG     = 0x00000100,
2159    APIC_FLAG    = 0x00000200,
2160    SEP_FLAG     = 0x00000800,
2161    MTRR_FLAG    = 0x00001000,
2162    PGE_FLAG     = 0x00002000,
2163    MCA_FLAG     = 0x00004000,
2164    CMOV_FLAG    = 0x00008000,
2165    PAT_FLAG     = 0x00010000,
2166    PSE36_FLAG   = 0x00020000,
2167    PSNUM_FLAG   = 0x00040000,
2168    CLFLUSH_FLAG = 0x00080000,
2169    DTS_FLAG     = 0x00200000,
2170    ACPI_FLAG    = 0x00400000,
2171    MMX_FLAG     = 0x00800000,
2172    FXSR_FLAG    = 0x01000000,
2173    SSE_FLAG     = 0x02000000,
2174    SSE2_FLAG    = 0x04000000,
2175    SS_FLAG      = 0x08000000,
2176    HTT_FLAG     = 0x10000000,
2177    TM_FLAG      = 0x20000000
2178 } FeatureEdxFlag;
2179 
2180 static BufferBlob* cpuid_brand_string_stub_blob;
2181 static const int   cpuid_brand_string_stub_size = 550;
2182 
2183 extern "C" {
2184   typedef void (*getCPUIDBrandString_stub_t)(void*);
2185 }
2186 
2187 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2188 
2189 // VM_Version statics
2190 enum {
2191   ExtendedFamilyIdLength_INTEL = 16,
2192   ExtendedFamilyIdLength_AMD   = 24
2193 };
2194 
2195 const size_t VENDOR_LENGTH = 13;
2196 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2197 static char* _cpu_brand_string = nullptr;
2198 static int64_t _max_qualified_cpu_frequency = 0;
2199 
2200 static int _no_of_threads = 0;
2201 static int _no_of_cores = 0;
2202 
2203 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2204   "8086/8088",
2205   "",
2206   "286",
2207   "386",
2208   "486",
2209   "Pentium",
2210   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2211   "",
2212   "",
2213   "",
2214   "",
2215   "",
2216   "",
2217   "",
2218   "",
2219   "Pentium 4"
2220 };
2221 
2222 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2223   "",
2224   "",
2225   "",
2226   "",
2227   "5x86",
2228   "K5/K6",
2229   "Athlon/AthlonXP",
2230   "",
2231   "",
2232   "",
2233   "",
2234   "",
2235   "",
2236   "",
2237   "",
2238   "Opteron/Athlon64",
2239   "Opteron QC/Phenom",  // Barcelona et.al.
2240   "",
2241   "",
2242   "",
2243   "",
2244   "",
2245   "",
2246   "Zen"
2247 };
2248 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2249 // September 2013, Vol 3C Table 35-1
2250 const char* const _model_id_pentium_pro[] = {
2251   "",
2252   "Pentium Pro",
2253   "",
2254   "Pentium II model 3",
2255   "",
2256   "Pentium II model 5/Xeon/Celeron",
2257   "Celeron",
2258   "Pentium III/Pentium III Xeon",
2259   "Pentium III/Pentium III Xeon",
2260   "Pentium M model 9",    // Yonah
2261   "Pentium III, model A",
2262   "Pentium III, model B",
2263   "",
2264   "Pentium M model D",    // Dothan
2265   "",
2266   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2267   "",
2268   "",
2269   "",
2270   "",
2271   "",
2272   "",
2273   "Celeron",              // 0x16 Celeron 65nm
2274   "Core 2",               // 0x17 Penryn / Harpertown
2275   "",
2276   "",
2277   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2278   "Atom",                 // 0x1B Z5xx series Silverthorn
2279   "",
2280   "Core 2",               // 0x1D Dunnington (6-core)
2281   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2282   "",
2283   "",
2284   "",
2285   "",
2286   "",
2287   "",
2288   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2289   "",
2290   "",
2291   "",                     // 0x28
2292   "",
2293   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2294   "",
2295   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2296   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2297   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2298   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2299   "",
2300   "",
2301   "",
2302   "",
2303   "",
2304   "",
2305   "",
2306   "",
2307   "",
2308   "",
2309   "Ivy Bridge",           // 0x3a
2310   "",
2311   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2312   "",                     // 0x3d "Next Generation Intel Core Processor"
2313   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2314   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2315   "",
2316   "",
2317   "",
2318   "",
2319   "",
2320   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2321   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2322   nullptr
2323 };
2324 
2325 /* Brand ID is for back compatibility
2326  * Newer CPUs uses the extended brand string */
2327 const char* const _brand_id[] = {
2328   "",
2329   "Celeron processor",
2330   "Pentium III processor",
2331   "Intel Pentium III Xeon processor",
2332   "",
2333   "",
2334   "",
2335   "",
2336   "Intel Pentium 4 processor",
2337   nullptr
2338 };
2339 
2340 
2341 const char* const _feature_edx_id[] = {
2342   "On-Chip FPU",
2343   "Virtual Mode Extensions",
2344   "Debugging Extensions",
2345   "Page Size Extensions",
2346   "Time Stamp Counter",
2347   "Model Specific Registers",
2348   "Physical Address Extension",
2349   "Machine Check Exceptions",
2350   "CMPXCHG8B Instruction",
2351   "On-Chip APIC",
2352   "",
2353   "Fast System Call",
2354   "Memory Type Range Registers",
2355   "Page Global Enable",
2356   "Machine Check Architecture",
2357   "Conditional Mov Instruction",
2358   "Page Attribute Table",
2359   "36-bit Page Size Extension",
2360   "Processor Serial Number",
2361   "CLFLUSH Instruction",
2362   "",
2363   "Debug Trace Store feature",
2364   "ACPI registers in MSR space",
2365   "Intel Architecture MMX Technology",
2366   "Fast Float Point Save and Restore",
2367   "Streaming SIMD extensions",
2368   "Streaming SIMD extensions 2",
2369   "Self-Snoop",
2370   "Hyper Threading",
2371   "Thermal Monitor",
2372   "",
2373   "Pending Break Enable"
2374 };
2375 
2376 const char* const _feature_extended_edx_id[] = {
2377   "",
2378   "",
2379   "",
2380   "",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "SYSCALL/SYSRET",
2389   "",
2390   "",
2391   "",
2392   "",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "Execute Disable Bit",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "",
2403   "",
2404   "RDTSCP",
2405   "",
2406   "Intel 64 Architecture",
2407   "",
2408   ""
2409 };
2410 
2411 const char* const _feature_ecx_id[] = {
2412   "Streaming SIMD Extensions 3",
2413   "PCLMULQDQ",
2414   "64-bit DS Area",
2415   "MONITOR/MWAIT instructions",
2416   "CPL Qualified Debug Store",
2417   "Virtual Machine Extensions",
2418   "Safer Mode Extensions",
2419   "Enhanced Intel SpeedStep technology",
2420   "Thermal Monitor 2",
2421   "Supplemental Streaming SIMD Extensions 3",
2422   "L1 Context ID",
2423   "",
2424   "Fused Multiply-Add",
2425   "CMPXCHG16B",
2426   "xTPR Update Control",
2427   "Perfmon and Debug Capability",
2428   "",
2429   "Process-context identifiers",
2430   "Direct Cache Access",
2431   "Streaming SIMD extensions 4.1",
2432   "Streaming SIMD extensions 4.2",
2433   "x2APIC",
2434   "MOVBE",
2435   "Popcount instruction",
2436   "TSC-Deadline",
2437   "AESNI",
2438   "XSAVE",
2439   "OSXSAVE",
2440   "AVX",
2441   "F16C",
2442   "RDRAND",
2443   ""
2444 };
2445 
2446 const char* const _feature_extended_ecx_id[] = {
2447   "LAHF/SAHF instruction support",
2448   "Core multi-processor legacy mode",
2449   "",
2450   "",
2451   "",
2452   "Advanced Bit Manipulations: LZCNT",
2453   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2454   "Misaligned SSE mode",
2455   "",
2456   "",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   ""
2479 };
2480 
2481 void VM_Version::initialize_tsc(void) {
2482   ResourceMark rm;
2483 
2484   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2485   if (cpuid_brand_string_stub_blob == nullptr) {
2486     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2487   }
2488   CodeBuffer c(cpuid_brand_string_stub_blob);
2489   VM_Version_StubGenerator g(&c);
2490   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2491                                    g.generate_getCPUIDBrandString());
2492 }
2493 
2494 const char* VM_Version::cpu_model_description(void) {
2495   uint32_t cpu_family = extended_cpu_family();
2496   uint32_t cpu_model = extended_cpu_model();
2497   const char* model = nullptr;
2498 
2499   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2500     for (uint32_t i = 0; i <= cpu_model; i++) {
2501       model = _model_id_pentium_pro[i];
2502       if (model == nullptr) {
2503         break;
2504       }
2505     }
2506   }
2507   return model;
2508 }
2509 
2510 const char* VM_Version::cpu_brand_string(void) {
2511   if (_cpu_brand_string == nullptr) {
2512     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2513     if (nullptr == _cpu_brand_string) {
2514       return nullptr;
2515     }
2516     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2517     if (ret_val != OS_OK) {
2518       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2519       _cpu_brand_string = nullptr;
2520     }
2521   }
2522   return _cpu_brand_string;
2523 }
2524 
2525 const char* VM_Version::cpu_brand(void) {
2526   const char*  brand  = nullptr;
2527 
2528   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2529     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2530     brand = _brand_id[0];
2531     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2532       brand = _brand_id[i];
2533     }
2534   }
2535   return brand;
2536 }
2537 
2538 bool VM_Version::cpu_is_em64t(void) {
2539   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2540 }
2541 
2542 bool VM_Version::is_netburst(void) {
2543   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2544 }
2545 
2546 bool VM_Version::supports_tscinv_ext(void) {
2547   if (!supports_tscinv_bit()) {
2548     return false;
2549   }
2550 
2551   if (is_intel()) {
2552     return true;
2553   }
2554 
2555   if (is_amd()) {
2556     return !is_amd_Barcelona();
2557   }
2558 
2559   if (is_hygon()) {
2560     return true;
2561   }
2562 
2563   return false;
2564 }
2565 
2566 void VM_Version::resolve_cpu_information_details(void) {
2567 
2568   // in future we want to base this information on proper cpu
2569   // and cache topology enumeration such as:
2570   // Intel 64 Architecture Processor Topology Enumeration
2571   // which supports system cpu and cache topology enumeration
2572   // either using 2xAPICIDs or initial APICIDs
2573 
2574   // currently only rough cpu information estimates
2575   // which will not necessarily reflect the exact configuration of the system
2576 
2577   // this is the number of logical hardware threads
2578   // visible to the operating system
2579   _no_of_threads = os::processor_count();
2580 
2581   // find out number of threads per cpu package
2582   int threads_per_package = threads_per_core() * cores_per_cpu();
2583 
2584   // use amount of threads visible to the process in order to guess number of sockets
2585   _no_of_sockets = _no_of_threads / threads_per_package;
2586 
2587   // process might only see a subset of the total number of threads
2588   // from a single processor package. Virtualization/resource management for example.
2589   // If so then just write a hard 1 as num of pkgs.
2590   if (0 == _no_of_sockets) {
2591     _no_of_sockets = 1;
2592   }
2593 
2594   // estimate the number of cores
2595   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2596 }
2597 
2598 
2599 const char* VM_Version::cpu_family_description(void) {
2600   int cpu_family_id = extended_cpu_family();
2601   if (is_amd()) {
2602     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2603       return _family_id_amd[cpu_family_id];
2604     }
2605   }
2606   if (is_intel()) {
2607     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2608       return cpu_model_description();
2609     }
2610     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2611       return _family_id_intel[cpu_family_id];
2612     }
2613   }
2614   if (is_hygon()) {
2615     return "Dhyana";
2616   }
2617   return "Unknown x86";
2618 }
2619 
2620 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2621   assert(buf != nullptr, "buffer is null!");
2622   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2623 
2624   const char* cpu_type = nullptr;
2625   const char* x64 = nullptr;
2626 
2627   if (is_intel()) {
2628     cpu_type = "Intel";
2629     x64 = cpu_is_em64t() ? " Intel64" : "";
2630   } else if (is_amd()) {
2631     cpu_type = "AMD";
2632     x64 = cpu_is_em64t() ? " AMD64" : "";
2633   } else if (is_hygon()) {
2634     cpu_type = "Hygon";
2635     x64 = cpu_is_em64t() ? " AMD64" : "";
2636   } else {
2637     cpu_type = "Unknown x86";
2638     x64 = cpu_is_em64t() ? " x86_64" : "";
2639   }
2640 
2641   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2642     cpu_type,
2643     cpu_family_description(),
2644     supports_ht() ? " (HT)" : "",
2645     supports_sse3() ? " SSE3" : "",
2646     supports_ssse3() ? " SSSE3" : "",
2647     supports_sse4_1() ? " SSE4.1" : "",
2648     supports_sse4_2() ? " SSE4.2" : "",
2649     supports_sse4a() ? " SSE4A" : "",
2650     is_netburst() ? " Netburst" : "",
2651     is_intel_family_core() ? " Core" : "",
2652     x64);
2653 
2654   return OS_OK;
2655 }
2656 
2657 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2658   assert(buf != nullptr, "buffer is null!");
2659   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2660   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2661 
2662   // invoke newly generated asm code to fetch CPU Brand String
2663   getCPUIDBrandString_stub(&_cpuid_info);
2664 
2665   // fetch results into buffer
2666   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2667   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2668   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2669   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2670   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2671   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2672   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2673   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2674   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2675   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2676   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2677   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2678 
2679   return OS_OK;
2680 }
2681 
2682 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2683   guarantee(buf != nullptr, "buffer is null!");
2684   guarantee(buf_len > 0, "buffer len not enough!");
2685 
2686   unsigned int flag = 0;
2687   unsigned int fi = 0;
2688   size_t       written = 0;
2689   const char*  prefix = "";
2690 
2691 #define WRITE_TO_BUF(string)                                                          \
2692   {                                                                                   \
2693     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2694     if (res < 0) {                                                                    \
2695       return buf_len - 1;                                                             \
2696     }                                                                                 \
2697     written += res;                                                                   \
2698     if (prefix[0] == '\0') {                                                          \
2699       prefix = ", ";                                                                  \
2700     }                                                                                 \
2701   }
2702 
2703   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2704     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2705       continue; /* no hyperthreading */
2706     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2707       continue; /* no fast system call */
2708     }
2709     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2710       WRITE_TO_BUF(_feature_edx_id[fi]);
2711     }
2712   }
2713 
2714   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2715     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2716       WRITE_TO_BUF(_feature_ecx_id[fi]);
2717     }
2718   }
2719 
2720   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2721     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2722       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2723     }
2724   }
2725 
2726   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2727     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2728       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2729     }
2730   }
2731 
2732   if (supports_tscinv_bit()) {
2733       WRITE_TO_BUF("Invariant TSC");
2734   }
2735 
2736   return written;
2737 }
2738 
2739 /**
2740  * Write a detailed description of the cpu to a given buffer, including
2741  * feature set.
2742  */
2743 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2744   assert(buf != nullptr, "buffer is null!");
2745   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2746 
2747   static const char* unknown = "<unknown>";
2748   char               vendor_id[VENDOR_LENGTH];
2749   const char*        family = nullptr;
2750   const char*        model = nullptr;
2751   const char*        brand = nullptr;
2752   int                outputLen = 0;
2753 
2754   family = cpu_family_description();
2755   if (family == nullptr) {
2756     family = unknown;
2757   }
2758 
2759   model = cpu_model_description();
2760   if (model == nullptr) {
2761     model = unknown;
2762   }
2763 
2764   brand = cpu_brand_string();
2765 
2766   if (brand == nullptr) {
2767     brand = cpu_brand();
2768     if (brand == nullptr) {
2769       brand = unknown;
2770     }
2771   }
2772 
2773   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2774   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2775   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2776   vendor_id[VENDOR_LENGTH-1] = '\0';
2777 
2778   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2779     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2780     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2781     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2782     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2783     "Supports: ",
2784     brand,
2785     vendor_id,
2786     family,
2787     extended_cpu_family(),
2788     model,
2789     extended_cpu_model(),
2790     cpu_stepping(),
2791     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2792     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2793     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2794     _cpuid_info.std_cpuid1_eax.value,
2795     _cpuid_info.std_cpuid1_ebx.value,
2796     _cpuid_info.std_cpuid1_ecx.value,
2797     _cpuid_info.std_cpuid1_edx.value,
2798     _cpuid_info.ext_cpuid1_eax,
2799     _cpuid_info.ext_cpuid1_ebx,
2800     _cpuid_info.ext_cpuid1_ecx,
2801     _cpuid_info.ext_cpuid1_edx);
2802 
2803   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2804     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2805     return OS_ERR;
2806   }
2807 
2808   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2809 
2810   return OS_OK;
2811 }
2812 
2813 
2814 // Fill in Abstract_VM_Version statics
2815 void VM_Version::initialize_cpu_information() {
2816   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2817   assert(!_initialized, "shouldn't be initialized yet");
2818   resolve_cpu_information_details();
2819 
2820   // initialize cpu_name and cpu_desc
2821   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2822   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2823   _initialized = true;
2824 }
2825 
2826 /**
2827  *  For information about extracting the frequency from the cpu brand string, please see:
2828  *
2829  *    Intel Processor Identification and the CPUID Instruction
2830  *    Application Note 485
2831  *    May 2012
2832  *
2833  * The return value is the frequency in Hz.
2834  */
2835 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2836   const char* const brand_string = cpu_brand_string();
2837   if (brand_string == nullptr) {
2838     return 0;
2839   }
2840   const int64_t MEGA = 1000000;
2841   int64_t multiplier = 0;
2842   int64_t frequency = 0;
2843   uint8_t idx = 0;
2844   // The brand string buffer is at most 48 bytes.
2845   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2846   for (; idx < 48-2; ++idx) {
2847     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2848     // Search brand string for "yHz" where y is M, G, or T.
2849     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2850       if (brand_string[idx] == 'M') {
2851         multiplier = MEGA;
2852       } else if (brand_string[idx] == 'G') {
2853         multiplier = MEGA * 1000;
2854       } else if (brand_string[idx] == 'T') {
2855         multiplier = MEGA * MEGA;
2856       }
2857       break;
2858     }
2859   }
2860   if (multiplier > 0) {
2861     // Compute frequency (in Hz) from brand string.
2862     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2863       frequency =  (brand_string[idx-4] - '0') * multiplier;
2864       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2865       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2866     } else { // format is "xxxx"
2867       frequency =  (brand_string[idx-4] - '0') * 1000;
2868       frequency += (brand_string[idx-3] - '0') * 100;
2869       frequency += (brand_string[idx-2] - '0') * 10;
2870       frequency += (brand_string[idx-1] - '0');
2871       frequency *= multiplier;
2872     }
2873   }
2874   return frequency;
2875 }
2876 
2877 
2878 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2879   if (_max_qualified_cpu_frequency == 0) {
2880     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2881   }
2882   return _max_qualified_cpu_frequency;
2883 }
2884 
2885 uint64_t VM_Version::feature_flags() {
2886   uint64_t result = 0;
2887   if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
2888     result |= CPU_CX8;
2889   if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
2890     result |= CPU_CMOV;
2891   if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
2892     result |= CPU_FLUSH;
2893 #ifdef _LP64
2894   // clflush should always be available on x86_64
2895   // if not we are in real trouble because we rely on it
2896   // to flush the code cache.
2897   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2898 #endif
2899   if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2900       _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
2901     result |= CPU_FXSR;
2902   // HT flag is set for multi-core processors also.
2903   if (threads_per_core() > 1)
2904     result |= CPU_HT;
2905   if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2906       _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
2907     result |= CPU_MMX;
2908   if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
2909     result |= CPU_SSE;
2910   if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
2911     result |= CPU_SSE2;
2912   if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
2913     result |= CPU_SSE3;
2914   if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
2915     result |= CPU_SSSE3;
2916   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
2917     result |= CPU_SSE4_1;
2918   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
2919     result |= CPU_SSE4_2;
2920   if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
2921     result |= CPU_POPCNT;
2922   if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
2923       _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
2924       _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
2925       _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
2926     result |= CPU_AVX;
2927     result |= CPU_VZEROUPPER;
2928     if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0)
2929       result |= CPU_F16C;
2930     if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
2931       result |= CPU_AVX2;
2932     if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
2933         _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
2934         _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
2935         _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
2936       result |= CPU_AVX512F;
2937       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
2938         result |= CPU_AVX512CD;
2939       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
2940         result |= CPU_AVX512DQ;
2941       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0)
2942         result |= CPU_AVX512_IFMA;
2943       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
2944         result |= CPU_AVX512PF;
2945       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
2946         result |= CPU_AVX512ER;
2947       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
2948         result |= CPU_AVX512BW;
2949       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
2950         result |= CPU_AVX512VL;
2951       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2952         result |= CPU_AVX512_VPOPCNTDQ;
2953       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2954         result |= CPU_AVX512_VPCLMULQDQ;
2955       if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
2956         result |= CPU_AVX512_VAES;
2957       if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
2958         result |= CPU_GFNI;
2959       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
2960         result |= CPU_AVX512_VNNI;
2961       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2962         result |= CPU_AVX512_BITALG;
2963       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2964         result |= CPU_AVX512_VBMI;
2965       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2966         result |= CPU_AVX512_VBMI2;
2967     }
2968   }
2969   if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
2970     result |= CPU_HV;
2971   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
2972     result |= CPU_BMI1;
2973   if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
2974     result |= CPU_TSC;
2975   if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
2976     result |= CPU_TSCINV_BIT;
2977   if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
2978     result |= CPU_AES;
2979   if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
2980     result |= CPU_ERMS;
2981   if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2982     result |= CPU_FSRM;
2983   if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
2984     result |= CPU_CLMUL;
2985   if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
2986     result |= CPU_RTM;
2987   if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
2988      result |= CPU_ADX;
2989   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
2990     result |= CPU_BMI2;
2991   if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
2992     result |= CPU_SHA;
2993   if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
2994     result |= CPU_FMA;
2995   if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
2996     result |= CPU_FLUSHOPT;
2997   if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
2998     result |= CPU_RDTSCP;
2999   if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
3000     result |= CPU_RDPID;
3001 
3002   // AMD|Hygon features.
3003   if (is_amd_family()) {
3004     if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
3005         (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
3006       result |= CPU_3DNOW_PREFETCH;
3007     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
3008       result |= CPU_LZCNT;
3009     if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
3010       result |= CPU_SSE4A;
3011   }
3012 
3013   // Intel features.
3014   if (is_intel()) {
3015     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3016       result |= CPU_LZCNT;
3017     }
3018     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3019       result |= CPU_3DNOW_PREFETCH;
3020     }
3021     if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
3022       result |= CPU_CLWB;
3023     }
3024     if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
3025       result |= CPU_SERIALIZE;
3026   }
3027 
3028   // ZX features.
3029   if (is_zx()) {
3030     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3031       result |= CPU_LZCNT;
3032     }
3033     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3034       result |= CPU_3DNOW_PREFETCH;
3035     }
3036   }
3037 
3038   // Protection key features.
3039   if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) {
3040     result |= CPU_PKU;
3041   }
3042   if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) {
3043     result |= CPU_OSPKE;
3044   }
3045 
3046   // Control flow enforcement (CET) features.
3047   if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) {
3048     result |= CPU_CET_SS;
3049   }
3050   if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) {
3051     result |= CPU_CET_IBT;
3052   }
3053 
3054   // Composite features.
3055   if (supports_tscinv_bit() &&
3056       ((is_amd_family() && !is_amd_Barcelona()) ||
3057        is_intel_tsc_synched_at_init())) {
3058     result |= CPU_TSCINV;
3059   }
3060 
3061   return result;
3062 }
3063 
3064 bool VM_Version::os_supports_avx_vectors() {
3065   bool retVal = false;
3066   int nreg = 2 LP64_ONLY(+2);
3067   if (supports_evex()) {
3068     // Verify that OS save/restore all bits of EVEX registers
3069     // during signal processing.
3070     retVal = true;
3071     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3072       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3073         retVal = false;
3074         break;
3075       }
3076     }
3077   } else if (supports_avx()) {
3078     // Verify that OS save/restore all bits of AVX registers
3079     // during signal processing.
3080     retVal = true;
3081     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3082       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3083         retVal = false;
3084         break;
3085       }
3086     }
3087     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3088     if (retVal == false) {
3089       // Verify that OS save/restore all bits of EVEX registers
3090       // during signal processing.
3091       retVal = true;
3092       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3093         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3094           retVal = false;
3095           break;
3096         }
3097       }
3098     }
3099   }
3100   return retVal;
3101 }
3102 
3103 uint VM_Version::cores_per_cpu() {
3104   uint result = 1;
3105   if (is_intel()) {
3106     bool supports_topology = supports_processor_topology();
3107     if (supports_topology) {
3108       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3109                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3110     }
3111     if (!supports_topology || result == 0) {
3112       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3113     }
3114   } else if (is_amd_family()) {
3115     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3116   } else if (is_zx()) {
3117     bool supports_topology = supports_processor_topology();
3118     if (supports_topology) {
3119       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3120                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3121     }
3122     if (!supports_topology || result == 0) {
3123       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3124     }
3125   }
3126   return result;
3127 }
3128 
3129 uint VM_Version::threads_per_core() {
3130   uint result = 1;
3131   if (is_intel() && supports_processor_topology()) {
3132     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3133   } else if (is_zx() && supports_processor_topology()) {
3134     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3135   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3136     if (cpu_family() >= 0x17) {
3137       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3138     } else {
3139       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3140                  cores_per_cpu();
3141     }
3142   }
3143   return (result == 0 ? 1 : result);
3144 }
3145 
3146 uint VM_Version::L1_line_size() {
3147   uint result = 0;
3148   if (is_intel()) {
3149     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3150   } else if (is_amd_family()) {
3151     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3152   } else if (is_zx()) {
3153     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3154   }
3155   if (result < 32) // not defined ?
3156     result = 32;   // 32 bytes by default on x86 and other x64
3157   return result;
3158 }
3159 
3160 bool VM_Version::is_intel_tsc_synched_at_init() {
3161   if (is_intel_family_core()) {
3162     uint32_t ext_model = extended_cpu_model();
3163     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3164         ext_model == CPU_MODEL_WESTMERE_EP    ||
3165         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3166         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3167       // <= 2-socket invariant tsc support. EX versions are usually used
3168       // in > 2-socket systems and likely don't synchronize tscs at
3169       // initialization.
3170       // Code that uses tsc values must be prepared for them to arbitrarily
3171       // jump forward or backward.
3172       return true;
3173     }
3174   }
3175   return false;
3176 }
3177 
3178 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3179   // Hardware prefetching (distance/size in bytes):
3180   // Pentium 3 -  64 /  32
3181   // Pentium 4 - 256 / 128
3182   // Athlon    -  64 /  32 ????
3183   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3184   // Core      - 128 /  64
3185   //
3186   // Software prefetching (distance in bytes / instruction with best score):
3187   // Pentium 3 - 128 / prefetchnta
3188   // Pentium 4 - 512 / prefetchnta
3189   // Athlon    - 128 / prefetchnta
3190   // Opteron   - 256 / prefetchnta
3191   // Core      - 256 / prefetchnta
3192   // It will be used only when AllocatePrefetchStyle > 0
3193 
3194   if (is_amd_family()) { // AMD | Hygon
3195     if (supports_sse2()) {
3196       return 256; // Opteron
3197     } else {
3198       return 128; // Athlon
3199     }
3200   } else { // Intel
3201     if (supports_sse3() && cpu_family() == 6) {
3202       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3203         return 192;
3204       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3205 #ifdef _LP64
3206         return 384;
3207 #else
3208         return 320;
3209 #endif
3210       }
3211     }
3212     if (supports_sse2()) {
3213       if (cpu_family() == 6) {
3214         return 256; // Pentium M, Core, Core2
3215       } else {
3216         return 512; // Pentium 4
3217       }
3218     } else {
3219       return 128; // Pentium 3 (and all other old CPUs)
3220     }
3221   }
3222 }
3223 
3224 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3225   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3226   switch (id) {
3227   case vmIntrinsics::_floatToFloat16:
3228   case vmIntrinsics::_float16ToFloat:
3229     if (!supports_float16()) {
3230       return false;
3231     }
3232     break;
3233   default:
3234     break;
3235   }
3236   return true;
3237 }