1 /*
   2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = 0;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = 0;
  59 
  60 static BufferBlob* stub_blob;
  61 static const int stub_size = 2000;
  62 
  63 extern "C" {
  64   typedef void (*get_cpu_info_stub_t)(void*);
  65   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  66 }
  67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  68 static detect_virt_stub_t detect_virt_stub = nullptr;
  69 
  70 #ifdef _LP64
  71 
  72 bool VM_Version::supports_clflush() {
  73   // clflush should always be available on x86_64
  74   // if not we are in real trouble because we rely on it
  75   // to flush the code cache.
  76   // Unfortunately, Assembler::clflush is currently called as part
  77   // of generation of the code cache flush routine. This happens
  78   // under Universe::init before the processor features are set
  79   // up. Assembler::flush calls this routine to check that clflush
  80   // is allowed. So, we give the caller a free pass if Universe init
  81   // is still in progress.
  82   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  83   return true;
  84 }
  85 #endif
  86 
  87 #define CPUID_STANDARD_FN   0x0
  88 #define CPUID_STANDARD_FN_1 0x1
  89 #define CPUID_STANDARD_FN_4 0x4
  90 #define CPUID_STANDARD_FN_B 0xb
  91 
  92 #define CPUID_EXTENDED_FN   0x80000000
  93 #define CPUID_EXTENDED_FN_1 0x80000001
  94 #define CPUID_EXTENDED_FN_2 0x80000002
  95 #define CPUID_EXTENDED_FN_3 0x80000003
  96 #define CPUID_EXTENDED_FN_4 0x80000004
  97 #define CPUID_EXTENDED_FN_7 0x80000007
  98 #define CPUID_EXTENDED_FN_8 0x80000008
  99 
 100 class VM_Version_StubGenerator: public StubCodeGenerator {
 101  public:
 102 
 103   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 104 
 105   address generate_get_cpu_info() {
 106     // Flags to test CPU type.
 107     const uint32_t HS_EFL_AC = 0x40000;
 108     const uint32_t HS_EFL_ID = 0x200000;
 109     // Values for when we don't have a CPUID instruction.
 110     const int      CPU_FAMILY_SHIFT = 8;
 111     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 112     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 113     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 114 
 115     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 116     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 117     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 118 
 119     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 120 #   define __ _masm->
 121 
 122     address start = __ pc();
 123 
 124     //
 125     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 126     //
 127     // LP64: rcx and rdx are first and second argument registers on windows
 128 
 129     __ push(rbp);
 130 #ifdef _LP64
 131     __ mov(rbp, c_rarg0); // cpuid_info address
 132 #else
 133     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 134 #endif
 135     __ push(rbx);
 136     __ push(rsi);
 137     __ pushf();          // preserve rbx, and flags
 138     __ pop(rax);
 139     __ push(rax);
 140     __ mov(rcx, rax);
 141     //
 142     // if we are unable to change the AC flag, we have a 386
 143     //
 144     __ xorl(rax, HS_EFL_AC);
 145     __ push(rax);
 146     __ popf();
 147     __ pushf();
 148     __ pop(rax);
 149     __ cmpptr(rax, rcx);
 150     __ jccb(Assembler::notEqual, detect_486);
 151 
 152     __ movl(rax, CPU_FAMILY_386);
 153     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 154     __ jmp(done);
 155 
 156     //
 157     // If we are unable to change the ID flag, we have a 486 which does
 158     // not support the "cpuid" instruction.
 159     //
 160     __ bind(detect_486);
 161     __ mov(rax, rcx);
 162     __ xorl(rax, HS_EFL_ID);
 163     __ push(rax);
 164     __ popf();
 165     __ pushf();
 166     __ pop(rax);
 167     __ cmpptr(rcx, rax);
 168     __ jccb(Assembler::notEqual, detect_586);
 169 
 170     __ bind(cpu486);
 171     __ movl(rax, CPU_FAMILY_486);
 172     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 173     __ jmp(done);
 174 
 175     //
 176     // At this point, we have a chip which supports the "cpuid" instruction
 177     //
 178     __ bind(detect_586);
 179     __ xorl(rax, rax);
 180     __ cpuid();
 181     __ orl(rax, rax);
 182     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 183                                         // value of at least 1, we give up and
 184                                         // assume a 486
 185     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 186     __ movl(Address(rsi, 0), rax);
 187     __ movl(Address(rsi, 4), rbx);
 188     __ movl(Address(rsi, 8), rcx);
 189     __ movl(Address(rsi,12), rdx);
 190 
 191     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 192     __ jccb(Assembler::belowEqual, std_cpuid4);
 193 
 194     //
 195     // cpuid(0xB) Processor Topology
 196     //
 197     __ movl(rax, 0xb);
 198     __ xorl(rcx, rcx);   // Threads level
 199     __ cpuid();
 200 
 201     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 202     __ movl(Address(rsi, 0), rax);
 203     __ movl(Address(rsi, 4), rbx);
 204     __ movl(Address(rsi, 8), rcx);
 205     __ movl(Address(rsi,12), rdx);
 206 
 207     __ movl(rax, 0xb);
 208     __ movl(rcx, 1);     // Cores level
 209     __ cpuid();
 210     __ push(rax);
 211     __ andl(rax, 0x1f);  // Determine if valid topology level
 212     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 213     __ andl(rax, 0xffff);
 214     __ pop(rax);
 215     __ jccb(Assembler::equal, std_cpuid4);
 216 
 217     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 218     __ movl(Address(rsi, 0), rax);
 219     __ movl(Address(rsi, 4), rbx);
 220     __ movl(Address(rsi, 8), rcx);
 221     __ movl(Address(rsi,12), rdx);
 222 
 223     __ movl(rax, 0xb);
 224     __ movl(rcx, 2);     // Packages level
 225     __ cpuid();
 226     __ push(rax);
 227     __ andl(rax, 0x1f);  // Determine if valid topology level
 228     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 229     __ andl(rax, 0xffff);
 230     __ pop(rax);
 231     __ jccb(Assembler::equal, std_cpuid4);
 232 
 233     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 234     __ movl(Address(rsi, 0), rax);
 235     __ movl(Address(rsi, 4), rbx);
 236     __ movl(Address(rsi, 8), rcx);
 237     __ movl(Address(rsi,12), rdx);
 238 
 239     //
 240     // cpuid(0x4) Deterministic cache params
 241     //
 242     __ bind(std_cpuid4);
 243     __ movl(rax, 4);
 244     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 245     __ jccb(Assembler::greater, std_cpuid1);
 246 
 247     __ xorl(rcx, rcx);   // L1 cache
 248     __ cpuid();
 249     __ push(rax);
 250     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 251     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 252     __ pop(rax);
 253     __ jccb(Assembler::equal, std_cpuid1);
 254 
 255     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 256     __ movl(Address(rsi, 0), rax);
 257     __ movl(Address(rsi, 4), rbx);
 258     __ movl(Address(rsi, 8), rcx);
 259     __ movl(Address(rsi,12), rdx);
 260 
 261     //
 262     // Standard cpuid(0x1)
 263     //
 264     __ bind(std_cpuid1);
 265     __ movl(rax, 1);
 266     __ cpuid();
 267     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 268     __ movl(Address(rsi, 0), rax);
 269     __ movl(Address(rsi, 4), rbx);
 270     __ movl(Address(rsi, 8), rcx);
 271     __ movl(Address(rsi,12), rdx);
 272 
 273     //
 274     // Check if OS has enabled XGETBV instruction to access XCR0
 275     // (OSXSAVE feature flag) and CPU supports AVX
 276     //
 277     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 278     __ cmpl(rcx, 0x18000000);
 279     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 280 
 281     //
 282     // XCR0, XFEATURE_ENABLED_MASK register
 283     //
 284     __ xorl(rcx, rcx);   // zero for XCR0 register
 285     __ xgetbv();
 286     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 287     __ movl(Address(rsi, 0), rax);
 288     __ movl(Address(rsi, 4), rdx);
 289 
 290     //
 291     // cpuid(0x7) Structured Extended Features
 292     //
 293     __ bind(sef_cpuid);
 294     __ movl(rax, 7);
 295     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 296     __ jccb(Assembler::greater, ext_cpuid);
 297     // ECX = 0
 298     __ xorl(rcx, rcx);
 299     __ cpuid();
 300     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 301     __ movl(Address(rsi, 0), rax);
 302     __ movl(Address(rsi, 4), rbx);
 303     __ movl(Address(rsi, 8), rcx);
 304     __ movl(Address(rsi, 12), rdx);
 305 
 306     // ECX = 1
 307     __ movl(rax, 7);
 308     __ movl(rcx, 1);
 309     __ cpuid();
 310     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_ecx1_offset())));
 311     __ movl(Address(rsi, 0), rax);
 312 
 313     //
 314     // Extended cpuid(0x80000000)
 315     //
 316     __ bind(ext_cpuid);
 317     __ movl(rax, 0x80000000);
 318     __ cpuid();
 319     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 320     __ jcc(Assembler::belowEqual, done);
 321     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 322     __ jcc(Assembler::belowEqual, ext_cpuid1);
 323     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 324     __ jccb(Assembler::belowEqual, ext_cpuid5);
 325     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 326     __ jccb(Assembler::belowEqual, ext_cpuid7);
 327     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 328     __ jccb(Assembler::belowEqual, ext_cpuid8);
 329     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 330     __ jccb(Assembler::below, ext_cpuid8);
 331     //
 332     // Extended cpuid(0x8000001E)
 333     //
 334     __ movl(rax, 0x8000001E);
 335     __ cpuid();
 336     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 337     __ movl(Address(rsi, 0), rax);
 338     __ movl(Address(rsi, 4), rbx);
 339     __ movl(Address(rsi, 8), rcx);
 340     __ movl(Address(rsi,12), rdx);
 341 
 342     //
 343     // Extended cpuid(0x80000008)
 344     //
 345     __ bind(ext_cpuid8);
 346     __ movl(rax, 0x80000008);
 347     __ cpuid();
 348     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 349     __ movl(Address(rsi, 0), rax);
 350     __ movl(Address(rsi, 4), rbx);
 351     __ movl(Address(rsi, 8), rcx);
 352     __ movl(Address(rsi,12), rdx);
 353 
 354     //
 355     // Extended cpuid(0x80000007)
 356     //
 357     __ bind(ext_cpuid7);
 358     __ movl(rax, 0x80000007);
 359     __ cpuid();
 360     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 361     __ movl(Address(rsi, 0), rax);
 362     __ movl(Address(rsi, 4), rbx);
 363     __ movl(Address(rsi, 8), rcx);
 364     __ movl(Address(rsi,12), rdx);
 365 
 366     //
 367     // Extended cpuid(0x80000005)
 368     //
 369     __ bind(ext_cpuid5);
 370     __ movl(rax, 0x80000005);
 371     __ cpuid();
 372     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 373     __ movl(Address(rsi, 0), rax);
 374     __ movl(Address(rsi, 4), rbx);
 375     __ movl(Address(rsi, 8), rcx);
 376     __ movl(Address(rsi,12), rdx);
 377 
 378     //
 379     // Extended cpuid(0x80000001)
 380     //
 381     __ bind(ext_cpuid1);
 382     __ movl(rax, 0x80000001);
 383     __ cpuid();
 384     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 385     __ movl(Address(rsi, 0), rax);
 386     __ movl(Address(rsi, 4), rbx);
 387     __ movl(Address(rsi, 8), rcx);
 388     __ movl(Address(rsi,12), rdx);
 389 
 390     //
 391     // Check if OS has enabled XGETBV instruction to access XCR0
 392     // (OSXSAVE feature flag) and CPU supports AVX
 393     //
 394     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 395     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 396     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 397     __ cmpl(rcx, 0x18000000);
 398     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 399 
 400     __ movl(rax, 0x6);
 401     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 402     __ cmpl(rax, 0x6);
 403     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 404 
 405     // we need to bridge farther than imm8, so we use this island as a thunk
 406     __ bind(done);
 407     __ jmp(wrapup);
 408 
 409     __ bind(start_simd_check);
 410     //
 411     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 412     // registers are not restored after a signal processing.
 413     // Generate SEGV here (reference through null)
 414     // and check upper YMM/ZMM bits after it.
 415     //
 416     int saved_useavx = UseAVX;
 417     int saved_usesse = UseSSE;
 418 
 419     // If UseAVX is uninitialized or is set by the user to include EVEX
 420     if (use_evex) {
 421       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 422       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 423       __ movl(rax, 0x10000);
 424       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 425       __ cmpl(rax, 0x10000);
 426       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 427       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 428       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 429       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 430       __ movl(rax, 0xE0);
 431       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 432       __ cmpl(rax, 0xE0);
 433       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 434 
 435       if (FLAG_IS_DEFAULT(UseAVX)) {
 436         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 437         __ movl(rax, Address(rsi, 0));
 438         __ cmpl(rax, 0x50654);              // If it is Skylake
 439         __ jcc(Assembler::equal, legacy_setup);
 440       }
 441       // EVEX setup: run in lowest evex mode
 442       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 443       UseAVX = 3;
 444       UseSSE = 2;
 445 #ifdef _WINDOWS
 446       // xmm5-xmm15 are not preserved by caller on windows
 447       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 448       __ subptr(rsp, 64);
 449       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 450 #ifdef _LP64
 451       __ subptr(rsp, 64);
 452       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 453       __ subptr(rsp, 64);
 454       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 455 #endif // _LP64
 456 #endif // _WINDOWS
 457 
 458       // load value into all 64 bytes of zmm7 register
 459       __ movl(rcx, VM_Version::ymm_test_value());
 460       __ movdl(xmm0, rcx);
 461       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 462       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 463 #ifdef _LP64
 464       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 465       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 466 #endif
 467       VM_Version::clean_cpuFeatures();
 468       __ jmp(save_restore_except);
 469     }
 470 
 471     __ bind(legacy_setup);
 472     // AVX setup
 473     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 474     UseAVX = 1;
 475     UseSSE = 2;
 476 #ifdef _WINDOWS
 477     __ subptr(rsp, 32);
 478     __ vmovdqu(Address(rsp, 0), xmm7);
 479 #ifdef _LP64
 480     __ subptr(rsp, 32);
 481     __ vmovdqu(Address(rsp, 0), xmm8);
 482     __ subptr(rsp, 32);
 483     __ vmovdqu(Address(rsp, 0), xmm15);
 484 #endif // _LP64
 485 #endif // _WINDOWS
 486 
 487     // load value into all 32 bytes of ymm7 register
 488     __ movl(rcx, VM_Version::ymm_test_value());
 489 
 490     __ movdl(xmm0, rcx);
 491     __ pshufd(xmm0, xmm0, 0x00);
 492     __ vinsertf128_high(xmm0, xmm0);
 493     __ vmovdqu(xmm7, xmm0);
 494 #ifdef _LP64
 495     __ vmovdqu(xmm8, xmm0);
 496     __ vmovdqu(xmm15, xmm0);
 497 #endif
 498     VM_Version::clean_cpuFeatures();
 499 
 500     __ bind(save_restore_except);
 501     __ xorl(rsi, rsi);
 502     VM_Version::set_cpuinfo_segv_addr(__ pc());
 503     // Generate SEGV
 504     __ movl(rax, Address(rsi, 0));
 505 
 506     VM_Version::set_cpuinfo_cont_addr(__ pc());
 507     // Returns here after signal. Save xmm0 to check it later.
 508 
 509     // If UseAVX is uninitialized or is set by the user to include EVEX
 510     if (use_evex) {
 511       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 512       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 513       __ movl(rax, 0x10000);
 514       __ andl(rax, Address(rsi, 4));
 515       __ cmpl(rax, 0x10000);
 516       __ jcc(Assembler::notEqual, legacy_save_restore);
 517       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 518       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 519       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 520       __ movl(rax, 0xE0);
 521       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 522       __ cmpl(rax, 0xE0);
 523       __ jcc(Assembler::notEqual, legacy_save_restore);
 524 
 525       if (FLAG_IS_DEFAULT(UseAVX)) {
 526         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 527         __ movl(rax, Address(rsi, 0));
 528         __ cmpl(rax, 0x50654);              // If it is Skylake
 529         __ jcc(Assembler::equal, legacy_save_restore);
 530       }
 531       // EVEX check: run in lowest evex mode
 532       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 533       UseAVX = 3;
 534       UseSSE = 2;
 535       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 536       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 537       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 538 #ifdef _LP64
 539       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 540       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 541 #endif
 542 
 543 #ifdef _WINDOWS
 544 #ifdef _LP64
 545       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 546       __ addptr(rsp, 64);
 547       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 548       __ addptr(rsp, 64);
 549 #endif // _LP64
 550       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 551       __ addptr(rsp, 64);
 552 #endif // _WINDOWS
 553       generate_vzeroupper(wrapup);
 554       VM_Version::clean_cpuFeatures();
 555       UseAVX = saved_useavx;
 556       UseSSE = saved_usesse;
 557       __ jmp(wrapup);
 558    }
 559 
 560     __ bind(legacy_save_restore);
 561     // AVX check
 562     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 563     UseAVX = 1;
 564     UseSSE = 2;
 565     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 566     __ vmovdqu(Address(rsi, 0), xmm0);
 567     __ vmovdqu(Address(rsi, 32), xmm7);
 568 #ifdef _LP64
 569     __ vmovdqu(Address(rsi, 64), xmm8);
 570     __ vmovdqu(Address(rsi, 96), xmm15);
 571 #endif
 572 
 573 #ifdef _WINDOWS
 574 #ifdef _LP64
 575     __ vmovdqu(xmm15, Address(rsp, 0));
 576     __ addptr(rsp, 32);
 577     __ vmovdqu(xmm8, Address(rsp, 0));
 578     __ addptr(rsp, 32);
 579 #endif // _LP64
 580     __ vmovdqu(xmm7, Address(rsp, 0));
 581     __ addptr(rsp, 32);
 582 #endif // _WINDOWS
 583     generate_vzeroupper(wrapup);
 584     VM_Version::clean_cpuFeatures();
 585     UseAVX = saved_useavx;
 586     UseSSE = saved_usesse;
 587 
 588     __ bind(wrapup);
 589     __ popf();
 590     __ pop(rsi);
 591     __ pop(rbx);
 592     __ pop(rbp);
 593     __ ret(0);
 594 
 595 #   undef __
 596 
 597     return start;
 598   };
 599   void generate_vzeroupper(Label& L_wrapup) {
 600 #   define __ _masm->
 601     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 602     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 603     __ jcc(Assembler::notEqual, L_wrapup);
 604     __ movl(rcx, 0x0FFF0FF0);
 605     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 606     __ andl(rcx, Address(rsi, 0));
 607     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 608     __ jcc(Assembler::equal, L_wrapup);
 609     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 610     __ jcc(Assembler::equal, L_wrapup);
 611     // vzeroupper() will use a pre-computed instruction sequence that we
 612     // can't compute until after we've determined CPU capabilities. Use
 613     // uncached variant here directly to be able to bootstrap correctly
 614     __ vzeroupper_uncached();
 615 #   undef __
 616   }
 617   address generate_detect_virt() {
 618     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 619 #   define __ _masm->
 620 
 621     address start = __ pc();
 622 
 623     // Evacuate callee-saved registers
 624     __ push(rbp);
 625     __ push(rbx);
 626     __ push(rsi); // for Windows
 627 
 628 #ifdef _LP64
 629     __ mov(rax, c_rarg0); // CPUID leaf
 630     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 631 #else
 632     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 633     __ movptr(rsi, Address(rsp, 20)); // register array address
 634 #endif
 635 
 636     __ cpuid();
 637 
 638     // Store result to register array
 639     __ movl(Address(rsi,  0), rax);
 640     __ movl(Address(rsi,  4), rbx);
 641     __ movl(Address(rsi,  8), rcx);
 642     __ movl(Address(rsi, 12), rdx);
 643 
 644     // Epilogue
 645     __ pop(rsi);
 646     __ pop(rbx);
 647     __ pop(rbp);
 648     __ ret(0);
 649 
 650 #   undef __
 651 
 652     return start;
 653   };
 654 
 655 
 656   address generate_getCPUIDBrandString(void) {
 657     // Flags to test CPU type.
 658     const uint32_t HS_EFL_AC           = 0x40000;
 659     const uint32_t HS_EFL_ID           = 0x200000;
 660     // Values for when we don't have a CPUID instruction.
 661     const int      CPU_FAMILY_SHIFT = 8;
 662     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 663     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 664 
 665     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 666 
 667     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 668 #   define __ _masm->
 669 
 670     address start = __ pc();
 671 
 672     //
 673     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 674     //
 675     // LP64: rcx and rdx are first and second argument registers on windows
 676 
 677     __ push(rbp);
 678 #ifdef _LP64
 679     __ mov(rbp, c_rarg0); // cpuid_info address
 680 #else
 681     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 682 #endif
 683     __ push(rbx);
 684     __ push(rsi);
 685     __ pushf();          // preserve rbx, and flags
 686     __ pop(rax);
 687     __ push(rax);
 688     __ mov(rcx, rax);
 689     //
 690     // if we are unable to change the AC flag, we have a 386
 691     //
 692     __ xorl(rax, HS_EFL_AC);
 693     __ push(rax);
 694     __ popf();
 695     __ pushf();
 696     __ pop(rax);
 697     __ cmpptr(rax, rcx);
 698     __ jccb(Assembler::notEqual, detect_486);
 699 
 700     __ movl(rax, CPU_FAMILY_386);
 701     __ jmp(done);
 702 
 703     //
 704     // If we are unable to change the ID flag, we have a 486 which does
 705     // not support the "cpuid" instruction.
 706     //
 707     __ bind(detect_486);
 708     __ mov(rax, rcx);
 709     __ xorl(rax, HS_EFL_ID);
 710     __ push(rax);
 711     __ popf();
 712     __ pushf();
 713     __ pop(rax);
 714     __ cmpptr(rcx, rax);
 715     __ jccb(Assembler::notEqual, detect_586);
 716 
 717     __ bind(cpu486);
 718     __ movl(rax, CPU_FAMILY_486);
 719     __ jmp(done);
 720 
 721     //
 722     // At this point, we have a chip which supports the "cpuid" instruction
 723     //
 724     __ bind(detect_586);
 725     __ xorl(rax, rax);
 726     __ cpuid();
 727     __ orl(rax, rax);
 728     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 729                                         // value of at least 1, we give up and
 730                                         // assume a 486
 731 
 732     //
 733     // Extended cpuid(0x80000000) for processor brand string detection
 734     //
 735     __ bind(ext_cpuid);
 736     __ movl(rax, CPUID_EXTENDED_FN);
 737     __ cpuid();
 738     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 739     __ jcc(Assembler::below, done);
 740 
 741     //
 742     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 743     //
 744     __ movl(rax, CPUID_EXTENDED_FN_2);
 745     __ cpuid();
 746     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 747     __ movl(Address(rsi, 0), rax);
 748     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 749     __ movl(Address(rsi, 0), rbx);
 750     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 751     __ movl(Address(rsi, 0), rcx);
 752     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 753     __ movl(Address(rsi,0), rdx);
 754 
 755     //
 756     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 757     //
 758     __ movl(rax, CPUID_EXTENDED_FN_3);
 759     __ cpuid();
 760     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 761     __ movl(Address(rsi, 0), rax);
 762     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 763     __ movl(Address(rsi, 0), rbx);
 764     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 765     __ movl(Address(rsi, 0), rcx);
 766     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 767     __ movl(Address(rsi,0), rdx);
 768 
 769     //
 770     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 771     //
 772     __ movl(rax, CPUID_EXTENDED_FN_4);
 773     __ cpuid();
 774     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 775     __ movl(Address(rsi, 0), rax);
 776     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 777     __ movl(Address(rsi, 0), rbx);
 778     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 779     __ movl(Address(rsi, 0), rcx);
 780     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 781     __ movl(Address(rsi,0), rdx);
 782 
 783     //
 784     // return
 785     //
 786     __ bind(done);
 787     __ popf();
 788     __ pop(rsi);
 789     __ pop(rbx);
 790     __ pop(rbp);
 791     __ ret(0);
 792 
 793 #   undef __
 794 
 795     return start;
 796   };
 797 };
 798 
 799 void VM_Version::get_processor_features() {
 800 
 801   _cpu = 4; // 486 by default
 802   _model = 0;
 803   _stepping = 0;
 804   _features = 0;
 805   _logical_processors_per_package = 1;
 806   // i486 internal cache is both I&D and has a 16-byte line size
 807   _L1_data_cache_line_size = 16;
 808 
 809   // Get raw processor info
 810 
 811   get_cpu_info_stub(&_cpuid_info);
 812 
 813   assert_is_initialized();
 814   _cpu = extended_cpu_family();
 815   _model = extended_cpu_model();
 816   _stepping = cpu_stepping();
 817 
 818   if (cpu_family() > 4) { // it supports CPUID
 819     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 820     _cpu_features = _features;   // Preserve features
 821     // Logical processors are only available on P4s and above,
 822     // and only if hyperthreading is available.
 823     _logical_processors_per_package = logical_processor_count();
 824     _L1_data_cache_line_size = L1_line_size();
 825   }
 826 
 827   // xchg and xadd instructions
 828   _supports_atomic_getset4 = true;
 829   _supports_atomic_getadd4 = true;
 830   LP64_ONLY(_supports_atomic_getset8 = true);
 831   LP64_ONLY(_supports_atomic_getadd8 = true);
 832 
 833 #ifdef _LP64
 834   // OS should support SSE for x64 and hardware should support at least SSE2.
 835   if (!VM_Version::supports_sse2()) {
 836     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 837   }
 838   // in 64 bit the use of SSE2 is the minimum
 839   if (UseSSE < 2) UseSSE = 2;
 840 #endif
 841 
 842 #ifdef AMD64
 843   // flush_icache_stub have to be generated first.
 844   // That is why Icache line size is hard coded in ICache class,
 845   // see icache_x86.hpp. It is also the reason why we can't use
 846   // clflush instruction in 32-bit VM since it could be running
 847   // on CPU which does not support it.
 848   //
 849   // The only thing we can do is to verify that flushed
 850   // ICache::line_size has correct value.
 851   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 852   // clflush_size is size in quadwords (8 bytes).
 853   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 854 #endif
 855 
 856 #ifdef _LP64
 857   // assigning this field effectively enables Unsafe.writebackMemory()
 858   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 859   // that is only implemented on x86_64 and only if the OS plays ball
 860   if (os::supports_map_sync()) {
 861     // publish data cache line flush size to generic field, otherwise
 862     // let if default to zero thereby disabling writeback
 863     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 864   }
 865 #endif
 866 
 867   // Check if processor has Intel Ecore
 868   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 869     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
 870     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 871   }
 872 
 873   if (UseSSE < 4) {
 874     _features &= ~CPU_SSE4_1;
 875     _features &= ~CPU_SSE4_2;
 876   }
 877 
 878   if (UseSSE < 3) {
 879     _features &= ~CPU_SSE3;
 880     _features &= ~CPU_SSSE3;
 881     _features &= ~CPU_SSE4A;
 882   }
 883 
 884   if (UseSSE < 2)
 885     _features &= ~CPU_SSE2;
 886 
 887   if (UseSSE < 1)
 888     _features &= ~CPU_SSE;
 889 
 890   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 891   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 892     UseAVX = 0;
 893   }
 894 
 895   // UseSSE is set to the smaller of what hardware supports and what
 896   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 897   // older Pentiums which do not support it.
 898   int use_sse_limit = 0;
 899   if (UseSSE > 0) {
 900     if (UseSSE > 3 && supports_sse4_1()) {
 901       use_sse_limit = 4;
 902     } else if (UseSSE > 2 && supports_sse3()) {
 903       use_sse_limit = 3;
 904     } else if (UseSSE > 1 && supports_sse2()) {
 905       use_sse_limit = 2;
 906     } else if (UseSSE > 0 && supports_sse()) {
 907       use_sse_limit = 1;
 908     } else {
 909       use_sse_limit = 0;
 910     }
 911   }
 912   if (FLAG_IS_DEFAULT(UseSSE)) {
 913     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 914   } else if (UseSSE > use_sse_limit) {
 915     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 916     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 917   }
 918 
 919   // first try initial setting and detect what we can support
 920   int use_avx_limit = 0;
 921   if (UseAVX > 0) {
 922     if (UseSSE < 4) {
 923       // Don't use AVX if SSE is unavailable or has been disabled.
 924       use_avx_limit = 0;
 925     } else if (UseAVX > 2 && supports_evex()) {
 926       use_avx_limit = 3;
 927     } else if (UseAVX > 1 && supports_avx2()) {
 928       use_avx_limit = 2;
 929     } else if (UseAVX > 0 && supports_avx()) {
 930       use_avx_limit = 1;
 931     } else {
 932       use_avx_limit = 0;
 933     }
 934   }
 935   if (FLAG_IS_DEFAULT(UseAVX)) {
 936     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 937     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 938       FLAG_SET_DEFAULT(UseAVX, 2);
 939     } else {
 940       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 941     }
 942   }
 943   if (UseAVX > use_avx_limit) {
 944     if (UseSSE < 4) {
 945       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 946     } else {
 947       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 948     }
 949     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 950   }
 951 
 952   if (UseAVX < 3) {
 953     _features &= ~CPU_AVX512F;
 954     _features &= ~CPU_AVX512DQ;
 955     _features &= ~CPU_AVX512CD;
 956     _features &= ~CPU_AVX512BW;
 957     _features &= ~CPU_AVX512VL;
 958     _features &= ~CPU_AVX512_VPOPCNTDQ;
 959     _features &= ~CPU_AVX512_VPCLMULQDQ;
 960     _features &= ~CPU_AVX512_VAES;
 961     _features &= ~CPU_AVX512_VNNI;
 962     _features &= ~CPU_AVX512_VBMI;
 963     _features &= ~CPU_AVX512_VBMI2;
 964     _features &= ~CPU_AVX512_BITALG;
 965     _features &= ~CPU_AVX512_IFMA;
 966   }
 967 
 968   if (UseAVX < 2) {
 969     _features &= ~CPU_AVX2;
 970     _features &= ~CPU_AVX_IFMA;
 971   }
 972 
 973   if (UseAVX < 1) {
 974     _features &= ~CPU_AVX;
 975     _features &= ~CPU_VZEROUPPER;
 976     _features &= ~CPU_F16C;
 977   }
 978 
 979   if (logical_processors_per_package() == 1) {
 980     // HT processor could be installed on a system which doesn't support HT.
 981     _features &= ~CPU_HT;
 982   }
 983 
 984   if (is_intel()) { // Intel cpus specific settings
 985     if (is_knights_family()) {
 986       _features &= ~CPU_VZEROUPPER;
 987       _features &= ~CPU_AVX512BW;
 988       _features &= ~CPU_AVX512VL;
 989       _features &= ~CPU_AVX512DQ;
 990       _features &= ~CPU_AVX512_VNNI;
 991       _features &= ~CPU_AVX512_VAES;
 992       _features &= ~CPU_AVX512_VPOPCNTDQ;
 993       _features &= ~CPU_AVX512_VPCLMULQDQ;
 994       _features &= ~CPU_AVX512_VBMI;
 995       _features &= ~CPU_AVX512_VBMI2;
 996       _features &= ~CPU_CLWB;
 997       _features &= ~CPU_FLUSHOPT;
 998       _features &= ~CPU_GFNI;
 999       _features &= ~CPU_AVX512_BITALG;
1000       _features &= ~CPU_AVX512_IFMA;
1001       _features &= ~CPU_AVX_IFMA;
1002     }
1003   }
1004 
1005   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1006     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1007   } else {
1008     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1009   }
1010 
1011   char buf[1024];
1012   int res = jio_snprintf(
1013               buf, sizeof(buf),
1014               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1015               cores_per_cpu(), threads_per_core(),
1016               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1017   assert(res > 0, "not enough temporary space allocated");
1018   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1019 
1020   _features_string = os::strdup(buf);
1021 
1022   // Use AES instructions if available.
1023   if (supports_aes()) {
1024     if (FLAG_IS_DEFAULT(UseAES)) {
1025       FLAG_SET_DEFAULT(UseAES, true);
1026     }
1027     if (!UseAES) {
1028       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1029         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1030       }
1031       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1032     } else {
1033       if (UseSSE > 2) {
1034         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1035           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1036         }
1037       } else {
1038         // The AES intrinsic stubs require AES instruction support (of course)
1039         // but also require sse3 mode or higher for instructions it use.
1040         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1041           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1042         }
1043         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1044       }
1045 
1046       // --AES-CTR begins--
1047       if (!UseAESIntrinsics) {
1048         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1049           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1050           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1051         }
1052       } else {
1053         if (supports_sse4_1()) {
1054           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1055             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1056           }
1057         } else {
1058            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1059            // but also require sse4.1 mode or higher for instructions it use.
1060           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1061              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1062            }
1063            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1064         }
1065       }
1066       // --AES-CTR ends--
1067     }
1068   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1069     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1070       warning("AES instructions are not available on this CPU");
1071       FLAG_SET_DEFAULT(UseAES, false);
1072     }
1073     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1074       warning("AES intrinsics are not available on this CPU");
1075       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1076     }
1077     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1078       warning("AES-CTR intrinsics are not available on this CPU");
1079       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1080     }
1081   }
1082 
1083   // Use CLMUL instructions if available.
1084   if (supports_clmul()) {
1085     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1086       UseCLMUL = true;
1087     }
1088   } else if (UseCLMUL) {
1089     if (!FLAG_IS_DEFAULT(UseCLMUL))
1090       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1091     FLAG_SET_DEFAULT(UseCLMUL, false);
1092   }
1093 
1094   if (UseCLMUL && (UseSSE > 2)) {
1095     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1096       UseCRC32Intrinsics = true;
1097     }
1098   } else if (UseCRC32Intrinsics) {
1099     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1100       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1101     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1102   }
1103 
1104 #ifdef _LP64
1105   if (supports_avx2()) {
1106     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1107       UseAdler32Intrinsics = true;
1108     }
1109   } else if (UseAdler32Intrinsics) {
1110     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1111       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1112     }
1113     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1114   }
1115 #else
1116   if (UseAdler32Intrinsics) {
1117     warning("Adler32Intrinsics not available on this CPU.");
1118     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1119   }
1120 #endif
1121 
1122   if (supports_sse4_2() && supports_clmul()) {
1123     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1124       UseCRC32CIntrinsics = true;
1125     }
1126   } else if (UseCRC32CIntrinsics) {
1127     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1128       warning("CRC32C intrinsics are not available on this CPU");
1129     }
1130     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1131   }
1132 
1133   // GHASH/GCM intrinsics
1134   if (UseCLMUL && (UseSSE > 2)) {
1135     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1136       UseGHASHIntrinsics = true;
1137     }
1138   } else if (UseGHASHIntrinsics) {
1139     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1140       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1141     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1142   }
1143 
1144 #ifdef _LP64
1145   // ChaCha20 Intrinsics
1146   // As long as the system supports AVX as a baseline we can do a
1147   // SIMD-enabled block function.  StubGenerator makes the determination
1148   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1149   // version.
1150   if (UseAVX >= 1) {
1151       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1152           UseChaCha20Intrinsics = true;
1153       }
1154   } else if (UseChaCha20Intrinsics) {
1155       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1156           warning("ChaCha20 intrinsic requires AVX instructions");
1157       }
1158       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1159   }
1160 #else
1161   // No support currently for ChaCha20 intrinsics on 32-bit platforms
1162   if (UseChaCha20Intrinsics) {
1163       warning("ChaCha20 intrinsics are not available on this CPU.");
1164       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1165   }
1166 #endif // _LP64
1167 
1168   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1169   if (UseAVX >= 2) {
1170     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1171       UseBASE64Intrinsics = true;
1172     }
1173   } else if (UseBASE64Intrinsics) {
1174      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1175       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1176     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1177   }
1178 
1179   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1180     if (FLAG_IS_DEFAULT(UseFMA)) {
1181       UseFMA = true;
1182     }
1183   } else if (UseFMA) {
1184     warning("FMA instructions are not available on this CPU");
1185     FLAG_SET_DEFAULT(UseFMA, false);
1186   }
1187 
1188   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1189     UseMD5Intrinsics = true;
1190   }
1191 
1192   if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1193     if (FLAG_IS_DEFAULT(UseSHA)) {
1194       UseSHA = true;
1195     }
1196   } else if (UseSHA) {
1197     warning("SHA instructions are not available on this CPU");
1198     FLAG_SET_DEFAULT(UseSHA, false);
1199   }
1200 
1201   if (supports_sha() && supports_sse4_1() && UseSHA) {
1202     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1203       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1204     }
1205   } else if (UseSHA1Intrinsics) {
1206     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1207     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1208   }
1209 
1210   if (supports_sse4_1() && UseSHA) {
1211     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1212       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1213     }
1214   } else if (UseSHA256Intrinsics) {
1215     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1216     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1217   }
1218 
1219 #ifdef _LP64
1220   // These are only supported on 64-bit
1221   if (UseSHA && supports_avx2() && supports_bmi2()) {
1222     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1223       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1224     }
1225   } else
1226 #endif
1227   if (UseSHA512Intrinsics) {
1228     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1229     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1230   }
1231 
1232   if (UseSHA3Intrinsics) {
1233     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1234     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1235   }
1236 
1237   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1238     FLAG_SET_DEFAULT(UseSHA, false);
1239   }
1240 
1241   if (!supports_rtm() && UseRTMLocking) {
1242     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1243   }
1244 
1245 #if INCLUDE_RTM_OPT
1246   if (UseRTMLocking) {
1247     if (!CompilerConfig::is_c2_enabled()) {
1248       // Only C2 does RTM locking optimization.
1249       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1250     }
1251     if (is_intel_family_core()) {
1252       if ((_model == CPU_MODEL_HASWELL_E3) ||
1253           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1254           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1255         // currently a collision between SKL and HSW_E3
1256         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1257           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1258                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1259         } else {
1260           warning("UseRTMLocking is only available as experimental option on this platform.");
1261         }
1262       }
1263     }
1264     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1265       // RTM locking should be used only for applications with
1266       // high lock contention. For now we do not use it by default.
1267       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1268     }
1269   } else { // !UseRTMLocking
1270     if (UseRTMForStackLocks) {
1271       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1272         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1273       }
1274       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1275     }
1276     if (UseRTMDeopt) {
1277       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1278     }
1279     if (PrintPreciseRTMLockingStatistics) {
1280       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1281     }
1282   }
1283 #else
1284   if (UseRTMLocking) {
1285     // Only C2 does RTM locking optimization.
1286     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1287   }
1288 #endif
1289 
1290 #ifdef COMPILER2
1291   if (UseFPUForSpilling) {
1292     if (UseSSE < 2) {
1293       // Only supported with SSE2+
1294       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1295     }
1296   }
1297 #endif
1298 
1299 #if COMPILER2_OR_JVMCI
1300   int max_vector_size = 0;
1301   if (UseSSE < 2) {
1302     // Vectors (in XMM) are only supported with SSE2+
1303     // SSE is always 2 on x64.
1304     max_vector_size = 0;
1305   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1306     // 16 byte vectors (in XMM) are supported with SSE2+
1307     max_vector_size = 16;
1308   } else if (UseAVX == 1 || UseAVX == 2) {
1309     // 32 bytes vectors (in YMM) are only supported with AVX+
1310     max_vector_size = 32;
1311   } else if (UseAVX > 2) {
1312     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1313     max_vector_size = 64;
1314   }
1315 
1316 #ifdef _LP64
1317   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1318 #else
1319   int min_vector_size = 0;
1320 #endif
1321 
1322   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1323     if (MaxVectorSize < min_vector_size) {
1324       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1325       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1326     }
1327     if (MaxVectorSize > max_vector_size) {
1328       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1329       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1330     }
1331     if (!is_power_of_2(MaxVectorSize)) {
1332       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1333       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1334     }
1335   } else {
1336     // If default, use highest supported configuration
1337     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1338   }
1339 
1340 #if defined(COMPILER2) && defined(ASSERT)
1341   if (MaxVectorSize > 0) {
1342     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1343       tty->print_cr("State of YMM registers after signal handle:");
1344       int nreg = 2 LP64_ONLY(+2);
1345       const char* ymm_name[4] = {"0", "7", "8", "15"};
1346       for (int i = 0; i < nreg; i++) {
1347         tty->print("YMM%s:", ymm_name[i]);
1348         for (int j = 7; j >=0; j--) {
1349           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1350         }
1351         tty->cr();
1352       }
1353     }
1354   }
1355 #endif // COMPILER2 && ASSERT
1356 
1357 #ifdef _LP64
1358   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1359     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1360       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1361     }
1362   } else
1363 #endif
1364   if (UsePoly1305Intrinsics) {
1365     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1366     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1367   }
1368 
1369 #ifdef _LP64
1370   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1371     UseMultiplyToLenIntrinsic = true;
1372   }
1373   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1374     UseSquareToLenIntrinsic = true;
1375   }
1376   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1377     UseMulAddIntrinsic = true;
1378   }
1379   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1380     UseMontgomeryMultiplyIntrinsic = true;
1381   }
1382   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1383     UseMontgomerySquareIntrinsic = true;
1384   }
1385 #else
1386   if (UseMultiplyToLenIntrinsic) {
1387     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1388       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1389     }
1390     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1391   }
1392   if (UseMontgomeryMultiplyIntrinsic) {
1393     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1394       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1395     }
1396     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1397   }
1398   if (UseMontgomerySquareIntrinsic) {
1399     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1400       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1401     }
1402     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1403   }
1404   if (UseSquareToLenIntrinsic) {
1405     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1406       warning("squareToLen intrinsic is not available in 32-bit VM");
1407     }
1408     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1409   }
1410   if (UseMulAddIntrinsic) {
1411     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1412       warning("mulAdd intrinsic is not available in 32-bit VM");
1413     }
1414     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1415   }
1416 #endif // _LP64
1417 #endif // COMPILER2_OR_JVMCI
1418 
1419   // On new cpus instructions which update whole XMM register should be used
1420   // to prevent partial register stall due to dependencies on high half.
1421   //
1422   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1423   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1424   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1425   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1426 
1427 
1428   if (is_zx()) { // ZX cpus specific settings
1429     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1430       UseStoreImmI16 = false; // don't use it on ZX cpus
1431     }
1432     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1433       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1434         // Use it on all ZX cpus
1435         UseAddressNop = true;
1436       }
1437     }
1438     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1439       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1440     }
1441     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1442       if (supports_sse3()) {
1443         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1444       } else {
1445         UseXmmRegToRegMoveAll = false;
1446       }
1447     }
1448     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1449 #ifdef COMPILER2
1450       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1451         // For new ZX cpus do the next optimization:
1452         // don't align the beginning of a loop if there are enough instructions
1453         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1454         // in current fetch line (OptoLoopAlignment) or the padding
1455         // is big (> MaxLoopPad).
1456         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1457         // generated NOP instructions. 11 is the largest size of one
1458         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1459         MaxLoopPad = 11;
1460       }
1461 #endif // COMPILER2
1462       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1463         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1464       }
1465       if (supports_sse4_2()) { // new ZX cpus
1466         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1467           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1468         }
1469       }
1470       if (supports_sse4_2()) {
1471         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1472           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1473         }
1474       } else {
1475         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1476           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1477         }
1478         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1479       }
1480     }
1481 
1482     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1483       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1484     }
1485   }
1486 
1487   if (is_amd_family()) { // AMD cpus specific settings
1488     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1489       // Use it on new AMD cpus starting from Opteron.
1490       UseAddressNop = true;
1491     }
1492     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1493       // Use it on new AMD cpus starting from Opteron.
1494       UseNewLongLShift = true;
1495     }
1496     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1497       if (supports_sse4a()) {
1498         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1499       } else {
1500         UseXmmLoadAndClearUpper = false;
1501       }
1502     }
1503     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1504       if (supports_sse4a()) {
1505         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1506       } else {
1507         UseXmmRegToRegMoveAll = false;
1508       }
1509     }
1510     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1511       if (supports_sse4a()) {
1512         UseXmmI2F = true;
1513       } else {
1514         UseXmmI2F = false;
1515       }
1516     }
1517     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1518       if (supports_sse4a()) {
1519         UseXmmI2D = true;
1520       } else {
1521         UseXmmI2D = false;
1522       }
1523     }
1524     if (supports_sse4_2()) {
1525       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1526         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1527       }
1528     } else {
1529       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1530         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1531       }
1532       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1533     }
1534 
1535     // some defaults for AMD family 15h
1536     if (cpu_family() == 0x15) {
1537       // On family 15h processors default is no sw prefetch
1538       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1539         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1540       }
1541       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1542       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1543         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1544       }
1545       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1546       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1547         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1548       }
1549       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1550         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1551       }
1552     }
1553 
1554 #ifdef COMPILER2
1555     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1556       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1557       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1558     }
1559 #endif // COMPILER2
1560 
1561     // Some defaults for AMD family >= 17h && Hygon family 18h
1562     if (cpu_family() >= 0x17) {
1563       // On family >=17h processors use XMM and UnalignedLoadStores
1564       // for Array Copy
1565       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1566         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1567       }
1568       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1569         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1570       }
1571 #ifdef COMPILER2
1572       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1573         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1574       }
1575 #endif
1576     }
1577   }
1578 
1579   if (is_intel()) { // Intel cpus specific settings
1580     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1581       UseStoreImmI16 = false; // don't use it on Intel cpus
1582     }
1583     if (cpu_family() == 6 || cpu_family() == 15) {
1584       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1585         // Use it on all Intel cpus starting from PentiumPro
1586         UseAddressNop = true;
1587       }
1588     }
1589     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1590       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1591     }
1592     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1593       if (supports_sse3()) {
1594         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1595       } else {
1596         UseXmmRegToRegMoveAll = false;
1597       }
1598     }
1599     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1600 #ifdef COMPILER2
1601       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1602         // For new Intel cpus do the next optimization:
1603         // don't align the beginning of a loop if there are enough instructions
1604         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1605         // in current fetch line (OptoLoopAlignment) or the padding
1606         // is big (> MaxLoopPad).
1607         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1608         // generated NOP instructions. 11 is the largest size of one
1609         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1610         MaxLoopPad = 11;
1611       }
1612 #endif // COMPILER2
1613 
1614       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1615         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1616       }
1617       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1618         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1619           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1620         }
1621       }
1622       if (supports_sse4_2()) {
1623         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1624           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1625         }
1626       } else {
1627         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1628           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1629         }
1630         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1631       }
1632     }
1633     if (is_atom_family() || is_knights_family()) {
1634 #ifdef COMPILER2
1635       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1636         OptoScheduling = true;
1637       }
1638 #endif
1639       if (supports_sse4_2()) { // Silvermont
1640         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1641           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1642         }
1643       }
1644       if (FLAG_IS_DEFAULT(UseIncDec)) {
1645         FLAG_SET_DEFAULT(UseIncDec, false);
1646       }
1647     }
1648     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1649       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1650     }
1651 #ifdef COMPILER2
1652     if (UseAVX > 2) {
1653       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1654           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1655            ArrayOperationPartialInlineSize != 0 &&
1656            ArrayOperationPartialInlineSize != 16 &&
1657            ArrayOperationPartialInlineSize != 32 &&
1658            ArrayOperationPartialInlineSize != 64)) {
1659         int inline_size = 0;
1660         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1661           inline_size = 64;
1662         } else if (MaxVectorSize >= 32) {
1663           inline_size = 32;
1664         } else if (MaxVectorSize >= 16) {
1665           inline_size = 16;
1666         }
1667         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1668           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1669         }
1670         ArrayOperationPartialInlineSize = inline_size;
1671       }
1672 
1673       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1674         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1675         if (ArrayOperationPartialInlineSize) {
1676           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1677         } else {
1678           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1679         }
1680       }
1681     }
1682 #endif
1683   }
1684 
1685 #ifdef COMPILER2
1686   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1687     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1688       OptimizeFill = false;
1689     }
1690   }
1691 #endif
1692 
1693 #ifdef _LP64
1694   if (UseSSE42Intrinsics) {
1695     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1696       UseVectorizedMismatchIntrinsic = true;
1697     }
1698   } else if (UseVectorizedMismatchIntrinsic) {
1699     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1700       warning("vectorizedMismatch intrinsics are not available on this CPU");
1701     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1702   }
1703   if (UseAVX >= 2) {
1704     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1705   } else if (UseVectorizedHashCodeIntrinsic) {
1706     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1707       warning("vectorizedHashCode intrinsics are not available on this CPU");
1708     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1709   }
1710 #else
1711   if (UseVectorizedMismatchIntrinsic) {
1712     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1713       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1714     }
1715     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1716   }
1717   if (UseVectorizedHashCodeIntrinsic) {
1718     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1719       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1720     }
1721     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1722   }
1723 #endif // _LP64
1724 
1725   // Use count leading zeros count instruction if available.
1726   if (supports_lzcnt()) {
1727     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1728       UseCountLeadingZerosInstruction = true;
1729     }
1730    } else if (UseCountLeadingZerosInstruction) {
1731     warning("lzcnt instruction is not available on this CPU");
1732     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1733   }
1734 
1735   // Use count trailing zeros instruction if available
1736   if (supports_bmi1()) {
1737     // tzcnt does not require VEX prefix
1738     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1739       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1740         // Don't use tzcnt if BMI1 is switched off on command line.
1741         UseCountTrailingZerosInstruction = false;
1742       } else {
1743         UseCountTrailingZerosInstruction = true;
1744       }
1745     }
1746   } else if (UseCountTrailingZerosInstruction) {
1747     warning("tzcnt instruction is not available on this CPU");
1748     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1749   }
1750 
1751   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1752   // VEX prefix is generated only when AVX > 0.
1753   if (supports_bmi1() && supports_avx()) {
1754     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1755       UseBMI1Instructions = true;
1756     }
1757   } else if (UseBMI1Instructions) {
1758     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1759     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1760   }
1761 
1762   if (supports_bmi2() && supports_avx()) {
1763     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1764       UseBMI2Instructions = true;
1765     }
1766   } else if (UseBMI2Instructions) {
1767     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1768     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1769   }
1770 
1771   // Use population count instruction if available.
1772   if (supports_popcnt()) {
1773     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1774       UsePopCountInstruction = true;
1775     }
1776   } else if (UsePopCountInstruction) {
1777     warning("POPCNT instruction is not available on this CPU");
1778     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1779   }
1780 
1781   // Use fast-string operations if available.
1782   if (supports_erms()) {
1783     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1784       UseFastStosb = true;
1785     }
1786   } else if (UseFastStosb) {
1787     warning("fast-string operations are not available on this CPU");
1788     FLAG_SET_DEFAULT(UseFastStosb, false);
1789   }
1790 
1791   // For AMD Processors use XMM/YMM MOVDQU instructions
1792   // for Object Initialization as default
1793   if (is_amd() && cpu_family() >= 0x19) {
1794     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1795       UseFastStosb = false;
1796     }
1797   }
1798 
1799 #ifdef COMPILER2
1800   if (is_intel() && MaxVectorSize > 16) {
1801     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1802       UseFastStosb = false;
1803     }
1804   }
1805 #endif
1806 
1807   // Use XMM/YMM MOVDQU instruction for Object Initialization
1808   if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1809     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1810       UseXMMForObjInit = true;
1811     }
1812   } else if (UseXMMForObjInit) {
1813     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1814     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1815   }
1816 
1817 #ifdef COMPILER2
1818   if (FLAG_IS_DEFAULT(AlignVector)) {
1819     // Modern processors allow misaligned memory operations for vectors.
1820     AlignVector = !UseUnalignedLoadStores;
1821   }
1822 #endif // COMPILER2
1823 
1824   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1825     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1826       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1827     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1828       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1829     }
1830   }
1831 
1832   // Allocation prefetch settings
1833   int cache_line_size = checked_cast<int>(prefetch_data_size());
1834   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1835       (cache_line_size > AllocatePrefetchStepSize)) {
1836     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1837   }
1838 
1839   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1840     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1841     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1842       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1843     }
1844     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1845   }
1846 
1847   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1848     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1849     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1850   }
1851 
1852   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1853     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1854         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1855       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1856     }
1857 #ifdef COMPILER2
1858     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1859       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1860     }
1861 #endif
1862   }
1863 
1864   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1865 #ifdef COMPILER2
1866     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1867       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1868     }
1869 #endif
1870   }
1871 
1872 #ifdef _LP64
1873   // Prefetch settings
1874 
1875   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1876   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1877   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1878   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1879 
1880   // gc copy/scan is disabled if prefetchw isn't supported, because
1881   // Prefetch::write emits an inlined prefetchw on Linux.
1882   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1883   // The used prefetcht0 instruction works for both amd64 and em64t.
1884 
1885   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1886     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1887   }
1888   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1889     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1890   }
1891 #endif
1892 
1893   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1894      (cache_line_size > ContendedPaddingWidth))
1895      ContendedPaddingWidth = cache_line_size;
1896 
1897   // This machine allows unaligned memory accesses
1898   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1899     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1900   }
1901 
1902 #ifndef PRODUCT
1903   if (log_is_enabled(Info, os, cpu)) {
1904     LogStream ls(Log(os, cpu)::info());
1905     outputStream* log = &ls;
1906     log->print_cr("Logical CPUs per core: %u",
1907                   logical_processors_per_package());
1908     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1909     log->print("UseSSE=%d", UseSSE);
1910     if (UseAVX > 0) {
1911       log->print("  UseAVX=%d", UseAVX);
1912     }
1913     if (UseAES) {
1914       log->print("  UseAES=1");
1915     }
1916 #ifdef COMPILER2
1917     if (MaxVectorSize > 0) {
1918       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1919     }
1920 #endif
1921     log->cr();
1922     log->print("Allocation");
1923     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1924       log->print_cr(": no prefetching");
1925     } else {
1926       log->print(" prefetching: ");
1927       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1928         log->print("PREFETCHW");
1929       } else if (UseSSE >= 1) {
1930         if (AllocatePrefetchInstr == 0) {
1931           log->print("PREFETCHNTA");
1932         } else if (AllocatePrefetchInstr == 1) {
1933           log->print("PREFETCHT0");
1934         } else if (AllocatePrefetchInstr == 2) {
1935           log->print("PREFETCHT2");
1936         } else if (AllocatePrefetchInstr == 3) {
1937           log->print("PREFETCHW");
1938         }
1939       }
1940       if (AllocatePrefetchLines > 1) {
1941         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1942       } else {
1943         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1944       }
1945     }
1946 
1947     if (PrefetchCopyIntervalInBytes > 0) {
1948       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1949     }
1950     if (PrefetchScanIntervalInBytes > 0) {
1951       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1952     }
1953     if (ContendedPaddingWidth > 0) {
1954       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1955     }
1956   }
1957 #endif // !PRODUCT
1958   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1959       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1960   }
1961   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1962       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1963   }
1964 }
1965 
1966 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1967   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1968   if (vrt == XenHVM) {
1969     st->print_cr("Xen hardware-assisted virtualization detected");
1970   } else if (vrt == KVM) {
1971     st->print_cr("KVM virtualization detected");
1972   } else if (vrt == VMWare) {
1973     st->print_cr("VMWare virtualization detected");
1974     VirtualizationSupport::print_virtualization_info(st);
1975   } else if (vrt == HyperV) {
1976     st->print_cr("Hyper-V virtualization detected");
1977   } else if (vrt == HyperVRole) {
1978     st->print_cr("Hyper-V role detected");
1979   }
1980 }
1981 
1982 bool VM_Version::compute_has_intel_jcc_erratum() {
1983   if (!is_intel_family_core()) {
1984     // Only Intel CPUs are affected.
1985     return false;
1986   }
1987   // The following table of affected CPUs is based on the following document released by Intel:
1988   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1989   switch (_model) {
1990   case 0x8E:
1991     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1992     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1993     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1994     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1995     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1996     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1997     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1998     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1999     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2000     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2001   case 0x4E:
2002     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2003     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2004     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2005     return _stepping == 0x3;
2006   case 0x55:
2007     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2008     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2009     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2010     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2011     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2012     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2013     return _stepping == 0x4 || _stepping == 0x7;
2014   case 0x5E:
2015     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2016     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2017     return _stepping == 0x3;
2018   case 0x9E:
2019     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2020     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2021     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2022     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2023     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2024     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2025     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2026     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2027     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2028     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2029     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2030     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2031     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2032     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2033     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2034   case 0xA5:
2035     // Not in Intel documentation.
2036     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2037     return true;
2038   case 0xA6:
2039     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2040     return _stepping == 0x0;
2041   case 0xAE:
2042     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2043     return _stepping == 0xA;
2044   default:
2045     // If we are running on another intel machine not recognized in the table, we are okay.
2046     return false;
2047   }
2048 }
2049 
2050 // On Xen, the cpuid instruction returns
2051 //  eax / registers[0]: Version of Xen
2052 //  ebx / registers[1]: chars 'XenV'
2053 //  ecx / registers[2]: chars 'MMXe'
2054 //  edx / registers[3]: chars 'nVMM'
2055 //
2056 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2057 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2058 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2059 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2060 //
2061 // more information :
2062 // https://kb.vmware.com/s/article/1009458
2063 //
2064 void VM_Version::check_virtualizations() {
2065   uint32_t registers[4] = {0};
2066   char signature[13] = {0};
2067 
2068   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2069   // from 0x40000000 until 0x40010000.
2070   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2071   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2072     detect_virt_stub(leaf, registers);
2073     memcpy(signature, &registers[1], 12);
2074 
2075     if (strncmp("VMwareVMware", signature, 12) == 0) {
2076       Abstract_VM_Version::_detected_virtualization = VMWare;
2077       // check for extended metrics from guestlib
2078       VirtualizationSupport::initialize();
2079     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2080       Abstract_VM_Version::_detected_virtualization = HyperV;
2081 #ifdef _WINDOWS
2082       // CPUID leaf 0x40000007 is available to the root partition only.
2083       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2084       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2085       detect_virt_stub(0x40000007, registers);
2086       if ((registers[0] != 0x0) ||
2087           (registers[1] != 0x0) ||
2088           (registers[2] != 0x0) ||
2089           (registers[3] != 0x0)) {
2090         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2091       }
2092 #endif
2093     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2094       Abstract_VM_Version::_detected_virtualization = KVM;
2095     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2096       Abstract_VM_Version::_detected_virtualization = XenHVM;
2097     }
2098   }
2099 }
2100 
2101 #ifdef COMPILER2
2102 // Determine if it's running on Cascade Lake using default options.
2103 bool VM_Version::is_default_intel_cascade_lake() {
2104   return FLAG_IS_DEFAULT(UseAVX) &&
2105          FLAG_IS_DEFAULT(MaxVectorSize) &&
2106          UseAVX > 2 &&
2107          is_intel_cascade_lake();
2108 }
2109 #endif
2110 
2111 bool VM_Version::is_intel_cascade_lake() {
2112   return is_intel_skylake() && _stepping >= 5;
2113 }
2114 
2115 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2116 // for implementing the array copy and clear operations.
2117 // The Intel platforms that supports the serialize instruction
2118 // has improved implementation of 64-byte load/stores and so the default
2119 // threshold is set to 0 for these platforms.
2120 int VM_Version::avx3_threshold() {
2121   return (is_intel_family_core() &&
2122           supports_serialize() &&
2123           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2124 }
2125 
2126 static bool _vm_version_initialized = false;
2127 
2128 void VM_Version::initialize() {
2129   ResourceMark rm;
2130   // Making this stub must be FIRST use of assembler
2131   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2132   if (stub_blob == nullptr) {
2133     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2134   }
2135   CodeBuffer c(stub_blob);
2136   VM_Version_StubGenerator g(&c);
2137 
2138   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2139                                      g.generate_get_cpu_info());
2140   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2141                                      g.generate_detect_virt());
2142 
2143   get_processor_features();
2144 
2145   LP64_ONLY(Assembler::precompute_instructions();)
2146 
2147   if (VM_Version::supports_hv()) { // Supports hypervisor
2148     check_virtualizations();
2149   }
2150   _vm_version_initialized = true;
2151 }
2152 
2153 typedef enum {
2154    CPU_FAMILY_8086_8088  = 0,
2155    CPU_FAMILY_INTEL_286  = 2,
2156    CPU_FAMILY_INTEL_386  = 3,
2157    CPU_FAMILY_INTEL_486  = 4,
2158    CPU_FAMILY_PENTIUM    = 5,
2159    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2160    CPU_FAMILY_PENTIUM_4  = 0xF
2161 } FamilyFlag;
2162 
2163 typedef enum {
2164   RDTSCP_FLAG  = 0x08000000, // bit 27
2165   INTEL64_FLAG = 0x20000000  // bit 29
2166 } _featureExtendedEdxFlag;
2167 
2168 typedef enum {
2169    FPU_FLAG     = 0x00000001,
2170    VME_FLAG     = 0x00000002,
2171    DE_FLAG      = 0x00000004,
2172    PSE_FLAG     = 0x00000008,
2173    TSC_FLAG     = 0x00000010,
2174    MSR_FLAG     = 0x00000020,
2175    PAE_FLAG     = 0x00000040,
2176    MCE_FLAG     = 0x00000080,
2177    CX8_FLAG     = 0x00000100,
2178    APIC_FLAG    = 0x00000200,
2179    SEP_FLAG     = 0x00000800,
2180    MTRR_FLAG    = 0x00001000,
2181    PGE_FLAG     = 0x00002000,
2182    MCA_FLAG     = 0x00004000,
2183    CMOV_FLAG    = 0x00008000,
2184    PAT_FLAG     = 0x00010000,
2185    PSE36_FLAG   = 0x00020000,
2186    PSNUM_FLAG   = 0x00040000,
2187    CLFLUSH_FLAG = 0x00080000,
2188    DTS_FLAG     = 0x00200000,
2189    ACPI_FLAG    = 0x00400000,
2190    MMX_FLAG     = 0x00800000,
2191    FXSR_FLAG    = 0x01000000,
2192    SSE_FLAG     = 0x02000000,
2193    SSE2_FLAG    = 0x04000000,
2194    SS_FLAG      = 0x08000000,
2195    HTT_FLAG     = 0x10000000,
2196    TM_FLAG      = 0x20000000
2197 } FeatureEdxFlag;
2198 
2199 static BufferBlob* cpuid_brand_string_stub_blob;
2200 static const int   cpuid_brand_string_stub_size = 550;
2201 
2202 extern "C" {
2203   typedef void (*getCPUIDBrandString_stub_t)(void*);
2204 }
2205 
2206 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2207 
2208 // VM_Version statics
2209 enum {
2210   ExtendedFamilyIdLength_INTEL = 16,
2211   ExtendedFamilyIdLength_AMD   = 24
2212 };
2213 
2214 const size_t VENDOR_LENGTH = 13;
2215 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2216 static char* _cpu_brand_string = nullptr;
2217 static int64_t _max_qualified_cpu_frequency = 0;
2218 
2219 static int _no_of_threads = 0;
2220 static int _no_of_cores = 0;
2221 
2222 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2223   "8086/8088",
2224   "",
2225   "286",
2226   "386",
2227   "486",
2228   "Pentium",
2229   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2230   "",
2231   "",
2232   "",
2233   "",
2234   "",
2235   "",
2236   "",
2237   "",
2238   "Pentium 4"
2239 };
2240 
2241 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2242   "",
2243   "",
2244   "",
2245   "",
2246   "5x86",
2247   "K5/K6",
2248   "Athlon/AthlonXP",
2249   "",
2250   "",
2251   "",
2252   "",
2253   "",
2254   "",
2255   "",
2256   "",
2257   "Opteron/Athlon64",
2258   "Opteron QC/Phenom",  // Barcelona et.al.
2259   "",
2260   "",
2261   "",
2262   "",
2263   "",
2264   "",
2265   "Zen"
2266 };
2267 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2268 // September 2013, Vol 3C Table 35-1
2269 const char* const _model_id_pentium_pro[] = {
2270   "",
2271   "Pentium Pro",
2272   "",
2273   "Pentium II model 3",
2274   "",
2275   "Pentium II model 5/Xeon/Celeron",
2276   "Celeron",
2277   "Pentium III/Pentium III Xeon",
2278   "Pentium III/Pentium III Xeon",
2279   "Pentium M model 9",    // Yonah
2280   "Pentium III, model A",
2281   "Pentium III, model B",
2282   "",
2283   "Pentium M model D",    // Dothan
2284   "",
2285   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2286   "",
2287   "",
2288   "",
2289   "",
2290   "",
2291   "",
2292   "Celeron",              // 0x16 Celeron 65nm
2293   "Core 2",               // 0x17 Penryn / Harpertown
2294   "",
2295   "",
2296   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2297   "Atom",                 // 0x1B Z5xx series Silverthorn
2298   "",
2299   "Core 2",               // 0x1D Dunnington (6-core)
2300   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2301   "",
2302   "",
2303   "",
2304   "",
2305   "",
2306   "",
2307   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2308   "",
2309   "",
2310   "",                     // 0x28
2311   "",
2312   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2313   "",
2314   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2315   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2316   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2317   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2318   "",
2319   "",
2320   "",
2321   "",
2322   "",
2323   "",
2324   "",
2325   "",
2326   "",
2327   "",
2328   "Ivy Bridge",           // 0x3a
2329   "",
2330   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2331   "",                     // 0x3d "Next Generation Intel Core Processor"
2332   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2333   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2334   "",
2335   "",
2336   "",
2337   "",
2338   "",
2339   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2340   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2341   nullptr
2342 };
2343 
2344 /* Brand ID is for back compatibility
2345  * Newer CPUs uses the extended brand string */
2346 const char* const _brand_id[] = {
2347   "",
2348   "Celeron processor",
2349   "Pentium III processor",
2350   "Intel Pentium III Xeon processor",
2351   "",
2352   "",
2353   "",
2354   "",
2355   "Intel Pentium 4 processor",
2356   nullptr
2357 };
2358 
2359 
2360 const char* const _feature_edx_id[] = {
2361   "On-Chip FPU",
2362   "Virtual Mode Extensions",
2363   "Debugging Extensions",
2364   "Page Size Extensions",
2365   "Time Stamp Counter",
2366   "Model Specific Registers",
2367   "Physical Address Extension",
2368   "Machine Check Exceptions",
2369   "CMPXCHG8B Instruction",
2370   "On-Chip APIC",
2371   "",
2372   "Fast System Call",
2373   "Memory Type Range Registers",
2374   "Page Global Enable",
2375   "Machine Check Architecture",
2376   "Conditional Mov Instruction",
2377   "Page Attribute Table",
2378   "36-bit Page Size Extension",
2379   "Processor Serial Number",
2380   "CLFLUSH Instruction",
2381   "",
2382   "Debug Trace Store feature",
2383   "ACPI registers in MSR space",
2384   "Intel Architecture MMX Technology",
2385   "Fast Float Point Save and Restore",
2386   "Streaming SIMD extensions",
2387   "Streaming SIMD extensions 2",
2388   "Self-Snoop",
2389   "Hyper Threading",
2390   "Thermal Monitor",
2391   "",
2392   "Pending Break Enable"
2393 };
2394 
2395 const char* const _feature_extended_edx_id[] = {
2396   "",
2397   "",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "SYSCALL/SYSRET",
2408   "",
2409   "",
2410   "",
2411   "",
2412   "",
2413   "",
2414   "",
2415   "",
2416   "Execute Disable Bit",
2417   "",
2418   "",
2419   "",
2420   "",
2421   "",
2422   "",
2423   "RDTSCP",
2424   "",
2425   "Intel 64 Architecture",
2426   "",
2427   ""
2428 };
2429 
2430 const char* const _feature_ecx_id[] = {
2431   "Streaming SIMD Extensions 3",
2432   "PCLMULQDQ",
2433   "64-bit DS Area",
2434   "MONITOR/MWAIT instructions",
2435   "CPL Qualified Debug Store",
2436   "Virtual Machine Extensions",
2437   "Safer Mode Extensions",
2438   "Enhanced Intel SpeedStep technology",
2439   "Thermal Monitor 2",
2440   "Supplemental Streaming SIMD Extensions 3",
2441   "L1 Context ID",
2442   "",
2443   "Fused Multiply-Add",
2444   "CMPXCHG16B",
2445   "xTPR Update Control",
2446   "Perfmon and Debug Capability",
2447   "",
2448   "Process-context identifiers",
2449   "Direct Cache Access",
2450   "Streaming SIMD extensions 4.1",
2451   "Streaming SIMD extensions 4.2",
2452   "x2APIC",
2453   "MOVBE",
2454   "Popcount instruction",
2455   "TSC-Deadline",
2456   "AESNI",
2457   "XSAVE",
2458   "OSXSAVE",
2459   "AVX",
2460   "F16C",
2461   "RDRAND",
2462   ""
2463 };
2464 
2465 const char* const _feature_extended_ecx_id[] = {
2466   "LAHF/SAHF instruction support",
2467   "Core multi-processor legacy mode",
2468   "",
2469   "",
2470   "",
2471   "Advanced Bit Manipulations: LZCNT",
2472   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2473   "Misaligned SSE mode",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "",
2485   "",
2486   "",
2487   "",
2488   "",
2489   "",
2490   "",
2491   "",
2492   "",
2493   "",
2494   "",
2495   "",
2496   "",
2497   ""
2498 };
2499 
2500 void VM_Version::initialize_tsc(void) {
2501   ResourceMark rm;
2502 
2503   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2504   if (cpuid_brand_string_stub_blob == nullptr) {
2505     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2506   }
2507   CodeBuffer c(cpuid_brand_string_stub_blob);
2508   VM_Version_StubGenerator g(&c);
2509   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2510                                    g.generate_getCPUIDBrandString());
2511 }
2512 
2513 const char* VM_Version::cpu_model_description(void) {
2514   uint32_t cpu_family = extended_cpu_family();
2515   uint32_t cpu_model = extended_cpu_model();
2516   const char* model = nullptr;
2517 
2518   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2519     for (uint32_t i = 0; i <= cpu_model; i++) {
2520       model = _model_id_pentium_pro[i];
2521       if (model == nullptr) {
2522         break;
2523       }
2524     }
2525   }
2526   return model;
2527 }
2528 
2529 const char* VM_Version::cpu_brand_string(void) {
2530   if (_cpu_brand_string == nullptr) {
2531     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2532     if (nullptr == _cpu_brand_string) {
2533       return nullptr;
2534     }
2535     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2536     if (ret_val != OS_OK) {
2537       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2538       _cpu_brand_string = nullptr;
2539     }
2540   }
2541   return _cpu_brand_string;
2542 }
2543 
2544 const char* VM_Version::cpu_brand(void) {
2545   const char*  brand  = nullptr;
2546 
2547   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2548     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2549     brand = _brand_id[0];
2550     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2551       brand = _brand_id[i];
2552     }
2553   }
2554   return brand;
2555 }
2556 
2557 bool VM_Version::cpu_is_em64t(void) {
2558   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2559 }
2560 
2561 bool VM_Version::is_netburst(void) {
2562   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2563 }
2564 
2565 bool VM_Version::supports_tscinv_ext(void) {
2566   if (!supports_tscinv_bit()) {
2567     return false;
2568   }
2569 
2570   if (is_intel()) {
2571     return true;
2572   }
2573 
2574   if (is_amd()) {
2575     return !is_amd_Barcelona();
2576   }
2577 
2578   if (is_hygon()) {
2579     return true;
2580   }
2581 
2582   return false;
2583 }
2584 
2585 void VM_Version::resolve_cpu_information_details(void) {
2586 
2587   // in future we want to base this information on proper cpu
2588   // and cache topology enumeration such as:
2589   // Intel 64 Architecture Processor Topology Enumeration
2590   // which supports system cpu and cache topology enumeration
2591   // either using 2xAPICIDs or initial APICIDs
2592 
2593   // currently only rough cpu information estimates
2594   // which will not necessarily reflect the exact configuration of the system
2595 
2596   // this is the number of logical hardware threads
2597   // visible to the operating system
2598   _no_of_threads = os::processor_count();
2599 
2600   // find out number of threads per cpu package
2601   int threads_per_package = threads_per_core() * cores_per_cpu();
2602 
2603   // use amount of threads visible to the process in order to guess number of sockets
2604   _no_of_sockets = _no_of_threads / threads_per_package;
2605 
2606   // process might only see a subset of the total number of threads
2607   // from a single processor package. Virtualization/resource management for example.
2608   // If so then just write a hard 1 as num of pkgs.
2609   if (0 == _no_of_sockets) {
2610     _no_of_sockets = 1;
2611   }
2612 
2613   // estimate the number of cores
2614   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2615 }
2616 
2617 
2618 const char* VM_Version::cpu_family_description(void) {
2619   int cpu_family_id = extended_cpu_family();
2620   if (is_amd()) {
2621     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2622       return _family_id_amd[cpu_family_id];
2623     }
2624   }
2625   if (is_intel()) {
2626     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2627       return cpu_model_description();
2628     }
2629     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2630       return _family_id_intel[cpu_family_id];
2631     }
2632   }
2633   if (is_hygon()) {
2634     return "Dhyana";
2635   }
2636   return "Unknown x86";
2637 }
2638 
2639 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2640   assert(buf != nullptr, "buffer is null!");
2641   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2642 
2643   const char* cpu_type = nullptr;
2644   const char* x64 = nullptr;
2645 
2646   if (is_intel()) {
2647     cpu_type = "Intel";
2648     x64 = cpu_is_em64t() ? " Intel64" : "";
2649   } else if (is_amd()) {
2650     cpu_type = "AMD";
2651     x64 = cpu_is_em64t() ? " AMD64" : "";
2652   } else if (is_hygon()) {
2653     cpu_type = "Hygon";
2654     x64 = cpu_is_em64t() ? " AMD64" : "";
2655   } else {
2656     cpu_type = "Unknown x86";
2657     x64 = cpu_is_em64t() ? " x86_64" : "";
2658   }
2659 
2660   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2661     cpu_type,
2662     cpu_family_description(),
2663     supports_ht() ? " (HT)" : "",
2664     supports_sse3() ? " SSE3" : "",
2665     supports_ssse3() ? " SSSE3" : "",
2666     supports_sse4_1() ? " SSE4.1" : "",
2667     supports_sse4_2() ? " SSE4.2" : "",
2668     supports_sse4a() ? " SSE4A" : "",
2669     is_netburst() ? " Netburst" : "",
2670     is_intel_family_core() ? " Core" : "",
2671     x64);
2672 
2673   return OS_OK;
2674 }
2675 
2676 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2677   assert(buf != nullptr, "buffer is null!");
2678   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2679   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2680 
2681   // invoke newly generated asm code to fetch CPU Brand String
2682   getCPUIDBrandString_stub(&_cpuid_info);
2683 
2684   // fetch results into buffer
2685   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2686   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2687   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2688   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2689   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2690   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2691   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2692   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2693   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2694   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2695   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2696   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2697 
2698   return OS_OK;
2699 }
2700 
2701 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2702   guarantee(buf != nullptr, "buffer is null!");
2703   guarantee(buf_len > 0, "buffer len not enough!");
2704 
2705   unsigned int flag = 0;
2706   unsigned int fi = 0;
2707   size_t       written = 0;
2708   const char*  prefix = "";
2709 
2710 #define WRITE_TO_BUF(string)                                                          \
2711   {                                                                                   \
2712     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2713     if (res < 0) {                                                                    \
2714       return buf_len - 1;                                                             \
2715     }                                                                                 \
2716     written += res;                                                                   \
2717     if (prefix[0] == '\0') {                                                          \
2718       prefix = ", ";                                                                  \
2719     }                                                                                 \
2720   }
2721 
2722   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2723     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2724       continue; /* no hyperthreading */
2725     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2726       continue; /* no fast system call */
2727     }
2728     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2729       WRITE_TO_BUF(_feature_edx_id[fi]);
2730     }
2731   }
2732 
2733   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2734     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2735       WRITE_TO_BUF(_feature_ecx_id[fi]);
2736     }
2737   }
2738 
2739   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2740     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2741       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2742     }
2743   }
2744 
2745   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2746     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2747       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2748     }
2749   }
2750 
2751   if (supports_tscinv_bit()) {
2752       WRITE_TO_BUF("Invariant TSC");
2753   }
2754 
2755   return written;
2756 }
2757 
2758 /**
2759  * Write a detailed description of the cpu to a given buffer, including
2760  * feature set.
2761  */
2762 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2763   assert(buf != nullptr, "buffer is null!");
2764   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2765 
2766   static const char* unknown = "<unknown>";
2767   char               vendor_id[VENDOR_LENGTH];
2768   const char*        family = nullptr;
2769   const char*        model = nullptr;
2770   const char*        brand = nullptr;
2771   int                outputLen = 0;
2772 
2773   family = cpu_family_description();
2774   if (family == nullptr) {
2775     family = unknown;
2776   }
2777 
2778   model = cpu_model_description();
2779   if (model == nullptr) {
2780     model = unknown;
2781   }
2782 
2783   brand = cpu_brand_string();
2784 
2785   if (brand == nullptr) {
2786     brand = cpu_brand();
2787     if (brand == nullptr) {
2788       brand = unknown;
2789     }
2790   }
2791 
2792   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2793   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2794   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2795   vendor_id[VENDOR_LENGTH-1] = '\0';
2796 
2797   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2798     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2799     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2800     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2801     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2802     "Supports: ",
2803     brand,
2804     vendor_id,
2805     family,
2806     extended_cpu_family(),
2807     model,
2808     extended_cpu_model(),
2809     cpu_stepping(),
2810     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2811     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2812     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2813     _cpuid_info.std_cpuid1_eax.value,
2814     _cpuid_info.std_cpuid1_ebx.value,
2815     _cpuid_info.std_cpuid1_ecx.value,
2816     _cpuid_info.std_cpuid1_edx.value,
2817     _cpuid_info.ext_cpuid1_eax,
2818     _cpuid_info.ext_cpuid1_ebx,
2819     _cpuid_info.ext_cpuid1_ecx,
2820     _cpuid_info.ext_cpuid1_edx);
2821 
2822   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2823     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2824     return OS_ERR;
2825   }
2826 
2827   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2828 
2829   return OS_OK;
2830 }
2831 
2832 
2833 // Fill in Abstract_VM_Version statics
2834 void VM_Version::initialize_cpu_information() {
2835   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2836   assert(!_initialized, "shouldn't be initialized yet");
2837   resolve_cpu_information_details();
2838 
2839   // initialize cpu_name and cpu_desc
2840   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2841   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2842   _initialized = true;
2843 }
2844 
2845 /**
2846  *  For information about extracting the frequency from the cpu brand string, please see:
2847  *
2848  *    Intel Processor Identification and the CPUID Instruction
2849  *    Application Note 485
2850  *    May 2012
2851  *
2852  * The return value is the frequency in Hz.
2853  */
2854 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2855   const char* const brand_string = cpu_brand_string();
2856   if (brand_string == nullptr) {
2857     return 0;
2858   }
2859   const int64_t MEGA = 1000000;
2860   int64_t multiplier = 0;
2861   int64_t frequency = 0;
2862   uint8_t idx = 0;
2863   // The brand string buffer is at most 48 bytes.
2864   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2865   for (; idx < 48-2; ++idx) {
2866     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2867     // Search brand string for "yHz" where y is M, G, or T.
2868     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2869       if (brand_string[idx] == 'M') {
2870         multiplier = MEGA;
2871       } else if (brand_string[idx] == 'G') {
2872         multiplier = MEGA * 1000;
2873       } else if (brand_string[idx] == 'T') {
2874         multiplier = MEGA * MEGA;
2875       }
2876       break;
2877     }
2878   }
2879   if (multiplier > 0) {
2880     // Compute frequency (in Hz) from brand string.
2881     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2882       frequency =  (brand_string[idx-4] - '0') * multiplier;
2883       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2884       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2885     } else { // format is "xxxx"
2886       frequency =  (brand_string[idx-4] - '0') * 1000;
2887       frequency += (brand_string[idx-3] - '0') * 100;
2888       frequency += (brand_string[idx-2] - '0') * 10;
2889       frequency += (brand_string[idx-1] - '0');
2890       frequency *= multiplier;
2891     }
2892   }
2893   return frequency;
2894 }
2895 
2896 
2897 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2898   if (_max_qualified_cpu_frequency == 0) {
2899     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2900   }
2901   return _max_qualified_cpu_frequency;
2902 }
2903 
2904 uint64_t VM_Version::CpuidInfo::feature_flags() const {
2905   uint64_t result = 0;
2906   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2907     result |= CPU_CX8;
2908   if (std_cpuid1_edx.bits.cmov != 0)
2909     result |= CPU_CMOV;
2910   if (std_cpuid1_edx.bits.clflush != 0)
2911     result |= CPU_FLUSH;
2912 #ifdef _LP64
2913   // clflush should always be available on x86_64
2914   // if not we are in real trouble because we rely on it
2915   // to flush the code cache.
2916   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2917 #endif
2918   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2919       ext_cpuid1_edx.bits.fxsr != 0))
2920     result |= CPU_FXSR;
2921   // HT flag is set for multi-core processors also.
2922   if (threads_per_core() > 1)
2923     result |= CPU_HT;
2924   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2925       ext_cpuid1_edx.bits.mmx != 0))
2926     result |= CPU_MMX;
2927   if (std_cpuid1_edx.bits.sse != 0)
2928     result |= CPU_SSE;
2929   if (std_cpuid1_edx.bits.sse2 != 0)
2930     result |= CPU_SSE2;
2931   if (std_cpuid1_ecx.bits.sse3 != 0)
2932     result |= CPU_SSE3;
2933   if (std_cpuid1_ecx.bits.ssse3 != 0)
2934     result |= CPU_SSSE3;
2935   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2936     result |= CPU_SSE4_1;
2937   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2938     result |= CPU_SSE4_2;
2939   if (std_cpuid1_ecx.bits.popcnt != 0)
2940     result |= CPU_POPCNT;
2941   if (std_cpuid1_ecx.bits.avx != 0 &&
2942       std_cpuid1_ecx.bits.osxsave != 0 &&
2943       xem_xcr0_eax.bits.sse != 0 &&
2944       xem_xcr0_eax.bits.ymm != 0) {
2945     result |= CPU_AVX;
2946     result |= CPU_VZEROUPPER;
2947     if (std_cpuid1_ecx.bits.f16c != 0)
2948       result |= CPU_F16C;
2949     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2950       result |= CPU_AVX2;
2951       if (sef_cpuid7_ecx1_eax.bits.avx_ifma != 0)
2952         result |= CPU_AVX_IFMA;
2953     }
2954     if (sef_cpuid7_ecx.bits.gfni != 0)
2955         result |= CPU_GFNI;
2956     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2957         xem_xcr0_eax.bits.opmask != 0 &&
2958         xem_xcr0_eax.bits.zmm512 != 0 &&
2959         xem_xcr0_eax.bits.zmm32 != 0) {
2960       result |= CPU_AVX512F;
2961       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2962         result |= CPU_AVX512CD;
2963       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2964         result |= CPU_AVX512DQ;
2965       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2966         result |= CPU_AVX512_IFMA;
2967       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2968         result |= CPU_AVX512PF;
2969       if (sef_cpuid7_ebx.bits.avx512er != 0)
2970         result |= CPU_AVX512ER;
2971       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2972         result |= CPU_AVX512BW;
2973       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2974         result |= CPU_AVX512VL;
2975       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2976         result |= CPU_AVX512_VPOPCNTDQ;
2977       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2978         result |= CPU_AVX512_VPCLMULQDQ;
2979       if (sef_cpuid7_ecx.bits.vaes != 0)
2980         result |= CPU_AVX512_VAES;
2981       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2982         result |= CPU_AVX512_VNNI;
2983       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2984         result |= CPU_AVX512_BITALG;
2985       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2986         result |= CPU_AVX512_VBMI;
2987       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2988         result |= CPU_AVX512_VBMI2;
2989     }
2990   }
2991   if (std_cpuid1_ecx.bits.hv != 0)
2992     result |= CPU_HV;
2993   if (sef_cpuid7_ebx.bits.bmi1 != 0)
2994     result |= CPU_BMI1;
2995   if (std_cpuid1_edx.bits.tsc != 0)
2996     result |= CPU_TSC;
2997   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
2998     result |= CPU_TSCINV_BIT;
2999   if (std_cpuid1_ecx.bits.aes != 0)
3000     result |= CPU_AES;
3001   if (sef_cpuid7_ebx.bits.erms != 0)
3002     result |= CPU_ERMS;
3003   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3004     result |= CPU_FSRM;
3005   if (std_cpuid1_ecx.bits.clmul != 0)
3006     result |= CPU_CLMUL;
3007   if (sef_cpuid7_ebx.bits.rtm != 0)
3008     result |= CPU_RTM;
3009   if (sef_cpuid7_ebx.bits.adx != 0)
3010      result |= CPU_ADX;
3011   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3012     result |= CPU_BMI2;
3013   if (sef_cpuid7_ebx.bits.sha != 0)
3014     result |= CPU_SHA;
3015   if (std_cpuid1_ecx.bits.fma != 0)
3016     result |= CPU_FMA;
3017   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3018     result |= CPU_FLUSHOPT;
3019   if (ext_cpuid1_edx.bits.rdtscp != 0)
3020     result |= CPU_RDTSCP;
3021   if (sef_cpuid7_ecx.bits.rdpid != 0)
3022     result |= CPU_RDPID;
3023 
3024   // AMD|Hygon features.
3025   if (is_amd_family()) {
3026     if ((ext_cpuid1_edx.bits.tdnow != 0) ||
3027         (ext_cpuid1_ecx.bits.prefetchw != 0))
3028       result |= CPU_3DNOW_PREFETCH;
3029     if (ext_cpuid1_ecx.bits.lzcnt != 0)
3030       result |= CPU_LZCNT;
3031     if (ext_cpuid1_ecx.bits.sse4a != 0)
3032       result |= CPU_SSE4A;
3033   }
3034 
3035   // Intel features.
3036   if (is_intel()) {
3037     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3038       result |= CPU_LZCNT;
3039     }
3040     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3041       result |= CPU_3DNOW_PREFETCH;
3042     }
3043     if (sef_cpuid7_ebx.bits.clwb != 0) {
3044       result |= CPU_CLWB;
3045     }
3046     if (sef_cpuid7_edx.bits.serialize != 0)
3047       result |= CPU_SERIALIZE;
3048   }
3049 
3050   // ZX features.
3051   if (is_zx()) {
3052     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3053       result |= CPU_LZCNT;
3054     }
3055     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3056       result |= CPU_3DNOW_PREFETCH;
3057     }
3058   }
3059 
3060   // Protection key features.
3061   if (sef_cpuid7_ecx.bits.pku != 0) {
3062     result |= CPU_PKU;
3063   }
3064   if (sef_cpuid7_ecx.bits.ospke != 0) {
3065     result |= CPU_OSPKE;
3066   }
3067 
3068   // Control flow enforcement (CET) features.
3069   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3070     result |= CPU_CET_SS;
3071   }
3072   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3073     result |= CPU_CET_IBT;
3074   }
3075 
3076   // Composite features.
3077   if (supports_tscinv_bit() &&
3078       ((is_amd_family() && !is_amd_Barcelona()) ||
3079        is_intel_tsc_synched_at_init())) {
3080     result |= CPU_TSCINV;
3081   }
3082 
3083   return result;
3084 }
3085 
3086 bool VM_Version::os_supports_avx_vectors() {
3087   bool retVal = false;
3088   int nreg = 2 LP64_ONLY(+2);
3089   if (supports_evex()) {
3090     // Verify that OS save/restore all bits of EVEX registers
3091     // during signal processing.
3092     retVal = true;
3093     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3094       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3095         retVal = false;
3096         break;
3097       }
3098     }
3099   } else if (supports_avx()) {
3100     // Verify that OS save/restore all bits of AVX registers
3101     // during signal processing.
3102     retVal = true;
3103     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3104       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3105         retVal = false;
3106         break;
3107       }
3108     }
3109     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3110     if (retVal == false) {
3111       // Verify that OS save/restore all bits of EVEX registers
3112       // during signal processing.
3113       retVal = true;
3114       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3115         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3116           retVal = false;
3117           break;
3118         }
3119       }
3120     }
3121   }
3122   return retVal;
3123 }
3124 
3125 uint VM_Version::cores_per_cpu() {
3126   uint result = 1;
3127   if (is_intel()) {
3128     bool supports_topology = supports_processor_topology();
3129     if (supports_topology) {
3130       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3131                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3132     }
3133     if (!supports_topology || result == 0) {
3134       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3135     }
3136   } else if (is_amd_family()) {
3137     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3138   } else if (is_zx()) {
3139     bool supports_topology = supports_processor_topology();
3140     if (supports_topology) {
3141       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3142                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3143     }
3144     if (!supports_topology || result == 0) {
3145       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3146     }
3147   }
3148   return result;
3149 }
3150 
3151 uint VM_Version::threads_per_core() {
3152   uint result = 1;
3153   if (is_intel() && supports_processor_topology()) {
3154     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3155   } else if (is_zx() && supports_processor_topology()) {
3156     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3157   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3158     if (cpu_family() >= 0x17) {
3159       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3160     } else {
3161       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3162                  cores_per_cpu();
3163     }
3164   }
3165   return (result == 0 ? 1 : result);
3166 }
3167 
3168 uint VM_Version::L1_line_size() {
3169   uint result = 0;
3170   if (is_intel()) {
3171     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3172   } else if (is_amd_family()) {
3173     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3174   } else if (is_zx()) {
3175     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3176   }
3177   if (result < 32) // not defined ?
3178     result = 32;   // 32 bytes by default on x86 and other x64
3179   return result;
3180 }
3181 
3182 bool VM_Version::is_intel_tsc_synched_at_init() {
3183   if (is_intel_family_core()) {
3184     uint32_t ext_model = extended_cpu_model();
3185     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3186         ext_model == CPU_MODEL_WESTMERE_EP    ||
3187         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3188         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3189       // <= 2-socket invariant tsc support. EX versions are usually used
3190       // in > 2-socket systems and likely don't synchronize tscs at
3191       // initialization.
3192       // Code that uses tsc values must be prepared for them to arbitrarily
3193       // jump forward or backward.
3194       return true;
3195     }
3196   }
3197   return false;
3198 }
3199 
3200 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3201   // Hardware prefetching (distance/size in bytes):
3202   // Pentium 3 -  64 /  32
3203   // Pentium 4 - 256 / 128
3204   // Athlon    -  64 /  32 ????
3205   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3206   // Core      - 128 /  64
3207   //
3208   // Software prefetching (distance in bytes / instruction with best score):
3209   // Pentium 3 - 128 / prefetchnta
3210   // Pentium 4 - 512 / prefetchnta
3211   // Athlon    - 128 / prefetchnta
3212   // Opteron   - 256 / prefetchnta
3213   // Core      - 256 / prefetchnta
3214   // It will be used only when AllocatePrefetchStyle > 0
3215 
3216   if (is_amd_family()) { // AMD | Hygon
3217     if (supports_sse2()) {
3218       return 256; // Opteron
3219     } else {
3220       return 128; // Athlon
3221     }
3222   } else { // Intel
3223     if (supports_sse3() && cpu_family() == 6) {
3224       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3225         return 192;
3226       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3227 #ifdef _LP64
3228         return 384;
3229 #else
3230         return 320;
3231 #endif
3232       }
3233     }
3234     if (supports_sse2()) {
3235       if (cpu_family() == 6) {
3236         return 256; // Pentium M, Core, Core2
3237       } else {
3238         return 512; // Pentium 4
3239       }
3240     } else {
3241       return 128; // Pentium 3 (and all other old CPUs)
3242     }
3243   }
3244 }
3245 
3246 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3247   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3248   switch (id) {
3249   case vmIntrinsics::_floatToFloat16:
3250   case vmIntrinsics::_float16ToFloat:
3251     if (!supports_float16()) {
3252       return false;
3253     }
3254     break;
3255   default:
3256     break;
3257   }
3258   return true;
3259 }