1 /*
   2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = 0;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = 0;
  59 
  60 static BufferBlob* stub_blob;
  61 static const int stub_size = 2000;
  62 
  63 extern "C" {
  64   typedef void (*get_cpu_info_stub_t)(void*);
  65   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  66 }
  67 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  68 static detect_virt_stub_t detect_virt_stub = nullptr;
  69 
  70 #ifdef _LP64
  71 
  72 bool VM_Version::supports_clflush() {
  73   // clflush should always be available on x86_64
  74   // if not we are in real trouble because we rely on it
  75   // to flush the code cache.
  76   // Unfortunately, Assembler::clflush is currently called as part
  77   // of generation of the code cache flush routine. This happens
  78   // under Universe::init before the processor features are set
  79   // up. Assembler::flush calls this routine to check that clflush
  80   // is allowed. So, we give the caller a free pass if Universe init
  81   // is still in progress.
  82   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  83   return true;
  84 }
  85 #endif
  86 
  87 #define CPUID_STANDARD_FN   0x0
  88 #define CPUID_STANDARD_FN_1 0x1
  89 #define CPUID_STANDARD_FN_4 0x4
  90 #define CPUID_STANDARD_FN_B 0xb
  91 
  92 #define CPUID_EXTENDED_FN   0x80000000
  93 #define CPUID_EXTENDED_FN_1 0x80000001
  94 #define CPUID_EXTENDED_FN_2 0x80000002
  95 #define CPUID_EXTENDED_FN_3 0x80000003
  96 #define CPUID_EXTENDED_FN_4 0x80000004
  97 #define CPUID_EXTENDED_FN_7 0x80000007
  98 #define CPUID_EXTENDED_FN_8 0x80000008
  99 
 100 class VM_Version_StubGenerator: public StubCodeGenerator {
 101  public:
 102 
 103   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 104 
 105   address generate_get_cpu_info() {
 106     // Flags to test CPU type.
 107     const uint32_t HS_EFL_AC = 0x40000;
 108     const uint32_t HS_EFL_ID = 0x200000;
 109     // Values for when we don't have a CPUID instruction.
 110     const int      CPU_FAMILY_SHIFT = 8;
 111     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 112     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 113     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 114 
 115     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 116     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 117     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 118 
 119     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 120 #   define __ _masm->
 121 
 122     address start = __ pc();
 123 
 124     //
 125     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 126     //
 127     // LP64: rcx and rdx are first and second argument registers on windows
 128 
 129     __ push(rbp);
 130 #ifdef _LP64
 131     __ mov(rbp, c_rarg0); // cpuid_info address
 132 #else
 133     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 134 #endif
 135     __ push(rbx);
 136     __ push(rsi);
 137     __ pushf();          // preserve rbx, and flags
 138     __ pop(rax);
 139     __ push(rax);
 140     __ mov(rcx, rax);
 141     //
 142     // if we are unable to change the AC flag, we have a 386
 143     //
 144     __ xorl(rax, HS_EFL_AC);
 145     __ push(rax);
 146     __ popf();
 147     __ pushf();
 148     __ pop(rax);
 149     __ cmpptr(rax, rcx);
 150     __ jccb(Assembler::notEqual, detect_486);
 151 
 152     __ movl(rax, CPU_FAMILY_386);
 153     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 154     __ jmp(done);
 155 
 156     //
 157     // If we are unable to change the ID flag, we have a 486 which does
 158     // not support the "cpuid" instruction.
 159     //
 160     __ bind(detect_486);
 161     __ mov(rax, rcx);
 162     __ xorl(rax, HS_EFL_ID);
 163     __ push(rax);
 164     __ popf();
 165     __ pushf();
 166     __ pop(rax);
 167     __ cmpptr(rcx, rax);
 168     __ jccb(Assembler::notEqual, detect_586);
 169 
 170     __ bind(cpu486);
 171     __ movl(rax, CPU_FAMILY_486);
 172     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 173     __ jmp(done);
 174 
 175     //
 176     // At this point, we have a chip which supports the "cpuid" instruction
 177     //
 178     __ bind(detect_586);
 179     __ xorl(rax, rax);
 180     __ cpuid();
 181     __ orl(rax, rax);
 182     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 183                                         // value of at least 1, we give up and
 184                                         // assume a 486
 185     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 186     __ movl(Address(rsi, 0), rax);
 187     __ movl(Address(rsi, 4), rbx);
 188     __ movl(Address(rsi, 8), rcx);
 189     __ movl(Address(rsi,12), rdx);
 190 
 191     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 192     __ jccb(Assembler::belowEqual, std_cpuid4);
 193 
 194     //
 195     // cpuid(0xB) Processor Topology
 196     //
 197     __ movl(rax, 0xb);
 198     __ xorl(rcx, rcx);   // Threads level
 199     __ cpuid();
 200 
 201     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 202     __ movl(Address(rsi, 0), rax);
 203     __ movl(Address(rsi, 4), rbx);
 204     __ movl(Address(rsi, 8), rcx);
 205     __ movl(Address(rsi,12), rdx);
 206 
 207     __ movl(rax, 0xb);
 208     __ movl(rcx, 1);     // Cores level
 209     __ cpuid();
 210     __ push(rax);
 211     __ andl(rax, 0x1f);  // Determine if valid topology level
 212     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 213     __ andl(rax, 0xffff);
 214     __ pop(rax);
 215     __ jccb(Assembler::equal, std_cpuid4);
 216 
 217     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 218     __ movl(Address(rsi, 0), rax);
 219     __ movl(Address(rsi, 4), rbx);
 220     __ movl(Address(rsi, 8), rcx);
 221     __ movl(Address(rsi,12), rdx);
 222 
 223     __ movl(rax, 0xb);
 224     __ movl(rcx, 2);     // Packages level
 225     __ cpuid();
 226     __ push(rax);
 227     __ andl(rax, 0x1f);  // Determine if valid topology level
 228     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 229     __ andl(rax, 0xffff);
 230     __ pop(rax);
 231     __ jccb(Assembler::equal, std_cpuid4);
 232 
 233     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 234     __ movl(Address(rsi, 0), rax);
 235     __ movl(Address(rsi, 4), rbx);
 236     __ movl(Address(rsi, 8), rcx);
 237     __ movl(Address(rsi,12), rdx);
 238 
 239     //
 240     // cpuid(0x4) Deterministic cache params
 241     //
 242     __ bind(std_cpuid4);
 243     __ movl(rax, 4);
 244     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 245     __ jccb(Assembler::greater, std_cpuid1);
 246 
 247     __ xorl(rcx, rcx);   // L1 cache
 248     __ cpuid();
 249     __ push(rax);
 250     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 251     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 252     __ pop(rax);
 253     __ jccb(Assembler::equal, std_cpuid1);
 254 
 255     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 256     __ movl(Address(rsi, 0), rax);
 257     __ movl(Address(rsi, 4), rbx);
 258     __ movl(Address(rsi, 8), rcx);
 259     __ movl(Address(rsi,12), rdx);
 260 
 261     //
 262     // Standard cpuid(0x1)
 263     //
 264     __ bind(std_cpuid1);
 265     __ movl(rax, 1);
 266     __ cpuid();
 267     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 268     __ movl(Address(rsi, 0), rax);
 269     __ movl(Address(rsi, 4), rbx);
 270     __ movl(Address(rsi, 8), rcx);
 271     __ movl(Address(rsi,12), rdx);
 272 
 273     //
 274     // Check if OS has enabled XGETBV instruction to access XCR0
 275     // (OSXSAVE feature flag) and CPU supports AVX
 276     //
 277     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 278     __ cmpl(rcx, 0x18000000);
 279     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 280 
 281     //
 282     // XCR0, XFEATURE_ENABLED_MASK register
 283     //
 284     __ xorl(rcx, rcx);   // zero for XCR0 register
 285     __ xgetbv();
 286     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 287     __ movl(Address(rsi, 0), rax);
 288     __ movl(Address(rsi, 4), rdx);
 289 
 290     //
 291     // cpuid(0x7) Structured Extended Features
 292     //
 293     __ bind(sef_cpuid);
 294     __ movl(rax, 7);
 295     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 296     __ jccb(Assembler::greater, ext_cpuid);
 297 
 298     __ xorl(rcx, rcx);
 299     __ cpuid();
 300     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 301     __ movl(Address(rsi, 0), rax);
 302     __ movl(Address(rsi, 4), rbx);
 303     __ movl(Address(rsi, 8), rcx);
 304     __ movl(Address(rsi, 12), rdx);
 305 
 306     //
 307     // Extended cpuid(0x80000000)
 308     //
 309     __ bind(ext_cpuid);
 310     __ movl(rax, 0x80000000);
 311     __ cpuid();
 312     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 313     __ jcc(Assembler::belowEqual, done);
 314     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 315     __ jcc(Assembler::belowEqual, ext_cpuid1);
 316     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 317     __ jccb(Assembler::belowEqual, ext_cpuid5);
 318     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 319     __ jccb(Assembler::belowEqual, ext_cpuid7);
 320     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 321     __ jccb(Assembler::belowEqual, ext_cpuid8);
 322     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 323     __ jccb(Assembler::below, ext_cpuid8);
 324     //
 325     // Extended cpuid(0x8000001E)
 326     //
 327     __ movl(rax, 0x8000001E);
 328     __ cpuid();
 329     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 330     __ movl(Address(rsi, 0), rax);
 331     __ movl(Address(rsi, 4), rbx);
 332     __ movl(Address(rsi, 8), rcx);
 333     __ movl(Address(rsi,12), rdx);
 334 
 335     //
 336     // Extended cpuid(0x80000008)
 337     //
 338     __ bind(ext_cpuid8);
 339     __ movl(rax, 0x80000008);
 340     __ cpuid();
 341     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 342     __ movl(Address(rsi, 0), rax);
 343     __ movl(Address(rsi, 4), rbx);
 344     __ movl(Address(rsi, 8), rcx);
 345     __ movl(Address(rsi,12), rdx);
 346 
 347     //
 348     // Extended cpuid(0x80000007)
 349     //
 350     __ bind(ext_cpuid7);
 351     __ movl(rax, 0x80000007);
 352     __ cpuid();
 353     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 354     __ movl(Address(rsi, 0), rax);
 355     __ movl(Address(rsi, 4), rbx);
 356     __ movl(Address(rsi, 8), rcx);
 357     __ movl(Address(rsi,12), rdx);
 358 
 359     //
 360     // Extended cpuid(0x80000005)
 361     //
 362     __ bind(ext_cpuid5);
 363     __ movl(rax, 0x80000005);
 364     __ cpuid();
 365     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 366     __ movl(Address(rsi, 0), rax);
 367     __ movl(Address(rsi, 4), rbx);
 368     __ movl(Address(rsi, 8), rcx);
 369     __ movl(Address(rsi,12), rdx);
 370 
 371     //
 372     // Extended cpuid(0x80000001)
 373     //
 374     __ bind(ext_cpuid1);
 375     __ movl(rax, 0x80000001);
 376     __ cpuid();
 377     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 378     __ movl(Address(rsi, 0), rax);
 379     __ movl(Address(rsi, 4), rbx);
 380     __ movl(Address(rsi, 8), rcx);
 381     __ movl(Address(rsi,12), rdx);
 382 
 383     //
 384     // Check if OS has enabled XGETBV instruction to access XCR0
 385     // (OSXSAVE feature flag) and CPU supports AVX
 386     //
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 388     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 389     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 390     __ cmpl(rcx, 0x18000000);
 391     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 392 
 393     __ movl(rax, 0x6);
 394     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 395     __ cmpl(rax, 0x6);
 396     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 397 
 398     // we need to bridge farther than imm8, so we use this island as a thunk
 399     __ bind(done);
 400     __ jmp(wrapup);
 401 
 402     __ bind(start_simd_check);
 403     //
 404     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 405     // registers are not restored after a signal processing.
 406     // Generate SEGV here (reference through null)
 407     // and check upper YMM/ZMM bits after it.
 408     //
 409     int saved_useavx = UseAVX;
 410     int saved_usesse = UseSSE;
 411 
 412     // If UseAVX is uninitialized or is set by the user to include EVEX
 413     if (use_evex) {
 414       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 415       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 416       __ movl(rax, 0x10000);
 417       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 418       __ cmpl(rax, 0x10000);
 419       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 420       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 421       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 422       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 423       __ movl(rax, 0xE0);
 424       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 425       __ cmpl(rax, 0xE0);
 426       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 427 
 428       if (FLAG_IS_DEFAULT(UseAVX)) {
 429         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 430         __ movl(rax, Address(rsi, 0));
 431         __ cmpl(rax, 0x50654);              // If it is Skylake
 432         __ jcc(Assembler::equal, legacy_setup);
 433       }
 434       // EVEX setup: run in lowest evex mode
 435       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 436       UseAVX = 3;
 437       UseSSE = 2;
 438 #ifdef _WINDOWS
 439       // xmm5-xmm15 are not preserved by caller on windows
 440       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 441       __ subptr(rsp, 64);
 442       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 443 #ifdef _LP64
 444       __ subptr(rsp, 64);
 445       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 446       __ subptr(rsp, 64);
 447       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 448 #endif // _LP64
 449 #endif // _WINDOWS
 450 
 451       // load value into all 64 bytes of zmm7 register
 452       __ movl(rcx, VM_Version::ymm_test_value());
 453       __ movdl(xmm0, rcx);
 454       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 455       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 456 #ifdef _LP64
 457       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 458       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 459 #endif
 460       VM_Version::clean_cpuFeatures();
 461       __ jmp(save_restore_except);
 462     }
 463 
 464     __ bind(legacy_setup);
 465     // AVX setup
 466     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 467     UseAVX = 1;
 468     UseSSE = 2;
 469 #ifdef _WINDOWS
 470     __ subptr(rsp, 32);
 471     __ vmovdqu(Address(rsp, 0), xmm7);
 472 #ifdef _LP64
 473     __ subptr(rsp, 32);
 474     __ vmovdqu(Address(rsp, 0), xmm8);
 475     __ subptr(rsp, 32);
 476     __ vmovdqu(Address(rsp, 0), xmm15);
 477 #endif // _LP64
 478 #endif // _WINDOWS
 479 
 480     // load value into all 32 bytes of ymm7 register
 481     __ movl(rcx, VM_Version::ymm_test_value());
 482 
 483     __ movdl(xmm0, rcx);
 484     __ pshufd(xmm0, xmm0, 0x00);
 485     __ vinsertf128_high(xmm0, xmm0);
 486     __ vmovdqu(xmm7, xmm0);
 487 #ifdef _LP64
 488     __ vmovdqu(xmm8, xmm0);
 489     __ vmovdqu(xmm15, xmm0);
 490 #endif
 491     VM_Version::clean_cpuFeatures();
 492 
 493     __ bind(save_restore_except);
 494     __ xorl(rsi, rsi);
 495     VM_Version::set_cpuinfo_segv_addr(__ pc());
 496     // Generate SEGV
 497     __ movl(rax, Address(rsi, 0));
 498 
 499     VM_Version::set_cpuinfo_cont_addr(__ pc());
 500     // Returns here after signal. Save xmm0 to check it later.
 501 
 502     // If UseAVX is uninitialized or is set by the user to include EVEX
 503     if (use_evex) {
 504       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 505       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 506       __ movl(rax, 0x10000);
 507       __ andl(rax, Address(rsi, 4));
 508       __ cmpl(rax, 0x10000);
 509       __ jcc(Assembler::notEqual, legacy_save_restore);
 510       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 511       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 512       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 513       __ movl(rax, 0xE0);
 514       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 515       __ cmpl(rax, 0xE0);
 516       __ jcc(Assembler::notEqual, legacy_save_restore);
 517 
 518       if (FLAG_IS_DEFAULT(UseAVX)) {
 519         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 520         __ movl(rax, Address(rsi, 0));
 521         __ cmpl(rax, 0x50654);              // If it is Skylake
 522         __ jcc(Assembler::equal, legacy_save_restore);
 523       }
 524       // EVEX check: run in lowest evex mode
 525       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 526       UseAVX = 3;
 527       UseSSE = 2;
 528       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 529       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 530       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 531 #ifdef _LP64
 532       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 533       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 534 #endif
 535 
 536 #ifdef _WINDOWS
 537 #ifdef _LP64
 538       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 539       __ addptr(rsp, 64);
 540       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 541       __ addptr(rsp, 64);
 542 #endif // _LP64
 543       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 544       __ addptr(rsp, 64);
 545 #endif // _WINDOWS
 546       generate_vzeroupper(wrapup);
 547       VM_Version::clean_cpuFeatures();
 548       UseAVX = saved_useavx;
 549       UseSSE = saved_usesse;
 550       __ jmp(wrapup);
 551    }
 552 
 553     __ bind(legacy_save_restore);
 554     // AVX check
 555     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 556     UseAVX = 1;
 557     UseSSE = 2;
 558     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 559     __ vmovdqu(Address(rsi, 0), xmm0);
 560     __ vmovdqu(Address(rsi, 32), xmm7);
 561 #ifdef _LP64
 562     __ vmovdqu(Address(rsi, 64), xmm8);
 563     __ vmovdqu(Address(rsi, 96), xmm15);
 564 #endif
 565 
 566 #ifdef _WINDOWS
 567 #ifdef _LP64
 568     __ vmovdqu(xmm15, Address(rsp, 0));
 569     __ addptr(rsp, 32);
 570     __ vmovdqu(xmm8, Address(rsp, 0));
 571     __ addptr(rsp, 32);
 572 #endif // _LP64
 573     __ vmovdqu(xmm7, Address(rsp, 0));
 574     __ addptr(rsp, 32);
 575 #endif // _WINDOWS
 576     generate_vzeroupper(wrapup);
 577     VM_Version::clean_cpuFeatures();
 578     UseAVX = saved_useavx;
 579     UseSSE = saved_usesse;
 580 
 581     __ bind(wrapup);
 582     __ popf();
 583     __ pop(rsi);
 584     __ pop(rbx);
 585     __ pop(rbp);
 586     __ ret(0);
 587 
 588 #   undef __
 589 
 590     return start;
 591   };
 592   void generate_vzeroupper(Label& L_wrapup) {
 593 #   define __ _masm->
 594     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 595     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 596     __ jcc(Assembler::notEqual, L_wrapup);
 597     __ movl(rcx, 0x0FFF0FF0);
 598     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 599     __ andl(rcx, Address(rsi, 0));
 600     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 601     __ jcc(Assembler::equal, L_wrapup);
 602     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 603     __ jcc(Assembler::equal, L_wrapup);
 604     // vzeroupper() will use a pre-computed instruction sequence that we
 605     // can't compute until after we've determined CPU capabilities. Use
 606     // uncached variant here directly to be able to bootstrap correctly
 607     __ vzeroupper_uncached();
 608 #   undef __
 609   }
 610   address generate_detect_virt() {
 611     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 612 #   define __ _masm->
 613 
 614     address start = __ pc();
 615 
 616     // Evacuate callee-saved registers
 617     __ push(rbp);
 618     __ push(rbx);
 619     __ push(rsi); // for Windows
 620 
 621 #ifdef _LP64
 622     __ mov(rax, c_rarg0); // CPUID leaf
 623     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 624 #else
 625     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 626     __ movptr(rsi, Address(rsp, 20)); // register array address
 627 #endif
 628 
 629     __ cpuid();
 630 
 631     // Store result to register array
 632     __ movl(Address(rsi,  0), rax);
 633     __ movl(Address(rsi,  4), rbx);
 634     __ movl(Address(rsi,  8), rcx);
 635     __ movl(Address(rsi, 12), rdx);
 636 
 637     // Epilogue
 638     __ pop(rsi);
 639     __ pop(rbx);
 640     __ pop(rbp);
 641     __ ret(0);
 642 
 643 #   undef __
 644 
 645     return start;
 646   };
 647 
 648 
 649   address generate_getCPUIDBrandString(void) {
 650     // Flags to test CPU type.
 651     const uint32_t HS_EFL_AC           = 0x40000;
 652     const uint32_t HS_EFL_ID           = 0x200000;
 653     // Values for when we don't have a CPUID instruction.
 654     const int      CPU_FAMILY_SHIFT = 8;
 655     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 656     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 657 
 658     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 659 
 660     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 661 #   define __ _masm->
 662 
 663     address start = __ pc();
 664 
 665     //
 666     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 667     //
 668     // LP64: rcx and rdx are first and second argument registers on windows
 669 
 670     __ push(rbp);
 671 #ifdef _LP64
 672     __ mov(rbp, c_rarg0); // cpuid_info address
 673 #else
 674     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 675 #endif
 676     __ push(rbx);
 677     __ push(rsi);
 678     __ pushf();          // preserve rbx, and flags
 679     __ pop(rax);
 680     __ push(rax);
 681     __ mov(rcx, rax);
 682     //
 683     // if we are unable to change the AC flag, we have a 386
 684     //
 685     __ xorl(rax, HS_EFL_AC);
 686     __ push(rax);
 687     __ popf();
 688     __ pushf();
 689     __ pop(rax);
 690     __ cmpptr(rax, rcx);
 691     __ jccb(Assembler::notEqual, detect_486);
 692 
 693     __ movl(rax, CPU_FAMILY_386);
 694     __ jmp(done);
 695 
 696     //
 697     // If we are unable to change the ID flag, we have a 486 which does
 698     // not support the "cpuid" instruction.
 699     //
 700     __ bind(detect_486);
 701     __ mov(rax, rcx);
 702     __ xorl(rax, HS_EFL_ID);
 703     __ push(rax);
 704     __ popf();
 705     __ pushf();
 706     __ pop(rax);
 707     __ cmpptr(rcx, rax);
 708     __ jccb(Assembler::notEqual, detect_586);
 709 
 710     __ bind(cpu486);
 711     __ movl(rax, CPU_FAMILY_486);
 712     __ jmp(done);
 713 
 714     //
 715     // At this point, we have a chip which supports the "cpuid" instruction
 716     //
 717     __ bind(detect_586);
 718     __ xorl(rax, rax);
 719     __ cpuid();
 720     __ orl(rax, rax);
 721     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 722                                         // value of at least 1, we give up and
 723                                         // assume a 486
 724 
 725     //
 726     // Extended cpuid(0x80000000) for processor brand string detection
 727     //
 728     __ bind(ext_cpuid);
 729     __ movl(rax, CPUID_EXTENDED_FN);
 730     __ cpuid();
 731     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 732     __ jcc(Assembler::below, done);
 733 
 734     //
 735     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 736     //
 737     __ movl(rax, CPUID_EXTENDED_FN_2);
 738     __ cpuid();
 739     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 740     __ movl(Address(rsi, 0), rax);
 741     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 742     __ movl(Address(rsi, 0), rbx);
 743     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 744     __ movl(Address(rsi, 0), rcx);
 745     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 746     __ movl(Address(rsi,0), rdx);
 747 
 748     //
 749     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 750     //
 751     __ movl(rax, CPUID_EXTENDED_FN_3);
 752     __ cpuid();
 753     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 754     __ movl(Address(rsi, 0), rax);
 755     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 756     __ movl(Address(rsi, 0), rbx);
 757     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 758     __ movl(Address(rsi, 0), rcx);
 759     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 760     __ movl(Address(rsi,0), rdx);
 761 
 762     //
 763     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 764     //
 765     __ movl(rax, CPUID_EXTENDED_FN_4);
 766     __ cpuid();
 767     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 768     __ movl(Address(rsi, 0), rax);
 769     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 770     __ movl(Address(rsi, 0), rbx);
 771     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 772     __ movl(Address(rsi, 0), rcx);
 773     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 774     __ movl(Address(rsi,0), rdx);
 775 
 776     //
 777     // return
 778     //
 779     __ bind(done);
 780     __ popf();
 781     __ pop(rsi);
 782     __ pop(rbx);
 783     __ pop(rbp);
 784     __ ret(0);
 785 
 786 #   undef __
 787 
 788     return start;
 789   };
 790 };
 791 
 792 void VM_Version::get_processor_features() {
 793 
 794   _cpu = 4; // 486 by default
 795   _model = 0;
 796   _stepping = 0;
 797   _features = 0;
 798   _logical_processors_per_package = 1;
 799   // i486 internal cache is both I&D and has a 16-byte line size
 800   _L1_data_cache_line_size = 16;
 801 
 802   // Get raw processor info
 803 
 804   get_cpu_info_stub(&_cpuid_info);
 805 
 806   assert_is_initialized();
 807   _cpu = extended_cpu_family();
 808   _model = extended_cpu_model();
 809   _stepping = cpu_stepping();
 810 
 811   if (cpu_family() > 4) { // it supports CPUID
 812     _features = feature_flags();
 813     // Logical processors are only available on P4s and above,
 814     // and only if hyperthreading is available.
 815     _logical_processors_per_package = logical_processor_count();
 816     _L1_data_cache_line_size = L1_line_size();
 817   }
 818 
 819   _supports_cx8 = supports_cmpxchg8();
 820   // xchg and xadd instructions
 821   _supports_atomic_getset4 = true;
 822   _supports_atomic_getadd4 = true;
 823   LP64_ONLY(_supports_atomic_getset8 = true);
 824   LP64_ONLY(_supports_atomic_getadd8 = true);
 825 
 826 #ifdef _LP64
 827   // OS should support SSE for x64 and hardware should support at least SSE2.
 828   if (!VM_Version::supports_sse2()) {
 829     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 830   }
 831   // in 64 bit the use of SSE2 is the minimum
 832   if (UseSSE < 2) UseSSE = 2;
 833 #endif
 834 
 835 #ifdef AMD64
 836   // flush_icache_stub have to be generated first.
 837   // That is why Icache line size is hard coded in ICache class,
 838   // see icache_x86.hpp. It is also the reason why we can't use
 839   // clflush instruction in 32-bit VM since it could be running
 840   // on CPU which does not support it.
 841   //
 842   // The only thing we can do is to verify that flushed
 843   // ICache::line_size has correct value.
 844   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 845   // clflush_size is size in quadwords (8 bytes).
 846   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 847 #endif
 848 
 849 #ifdef _LP64
 850   // assigning this field effectively enables Unsafe.writebackMemory()
 851   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 852   // that is only implemented on x86_64 and only if the OS plays ball
 853   if (os::supports_map_sync()) {
 854     // publish data cache line flush size to generic field, otherwise
 855     // let if default to zero thereby disabling writeback
 856     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 857   }
 858 #endif
 859 
 860   if (UseSSE < 4) {
 861     _features &= ~CPU_SSE4_1;
 862     _features &= ~CPU_SSE4_2;
 863   }
 864 
 865   if (UseSSE < 3) {
 866     _features &= ~CPU_SSE3;
 867     _features &= ~CPU_SSSE3;
 868     _features &= ~CPU_SSE4A;
 869   }
 870 
 871   if (UseSSE < 2)
 872     _features &= ~CPU_SSE2;
 873 
 874   if (UseSSE < 1)
 875     _features &= ~CPU_SSE;
 876 
 877   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 878   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 879     UseAVX = 0;
 880   }
 881 
 882   // UseSSE is set to the smaller of what hardware supports and what
 883   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 884   // older Pentiums which do not support it.
 885   int use_sse_limit = 0;
 886   if (UseSSE > 0) {
 887     if (UseSSE > 3 && supports_sse4_1()) {
 888       use_sse_limit = 4;
 889     } else if (UseSSE > 2 && supports_sse3()) {
 890       use_sse_limit = 3;
 891     } else if (UseSSE > 1 && supports_sse2()) {
 892       use_sse_limit = 2;
 893     } else if (UseSSE > 0 && supports_sse()) {
 894       use_sse_limit = 1;
 895     } else {
 896       use_sse_limit = 0;
 897     }
 898   }
 899   if (FLAG_IS_DEFAULT(UseSSE)) {
 900     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 901   } else if (UseSSE > use_sse_limit) {
 902     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 903     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 904   }
 905 
 906   // first try initial setting and detect what we can support
 907   int use_avx_limit = 0;
 908   if (UseAVX > 0) {
 909     if (UseSSE < 4) {
 910       // Don't use AVX if SSE is unavailable or has been disabled.
 911       use_avx_limit = 0;
 912     } else if (UseAVX > 2 && supports_evex()) {
 913       use_avx_limit = 3;
 914     } else if (UseAVX > 1 && supports_avx2()) {
 915       use_avx_limit = 2;
 916     } else if (UseAVX > 0 && supports_avx()) {
 917       use_avx_limit = 1;
 918     } else {
 919       use_avx_limit = 0;
 920     }
 921   }
 922   if (FLAG_IS_DEFAULT(UseAVX)) {
 923     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 924     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 925       FLAG_SET_DEFAULT(UseAVX, 2);
 926     } else {
 927       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 928     }
 929   }
 930   if (UseAVX > use_avx_limit) {
 931     if (UseSSE < 4) {
 932       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 933     } else {
 934       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 935     }
 936     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 937   }
 938 
 939   if (UseAVX < 3) {
 940     _features &= ~CPU_AVX512F;
 941     _features &= ~CPU_AVX512DQ;
 942     _features &= ~CPU_AVX512CD;
 943     _features &= ~CPU_AVX512BW;
 944     _features &= ~CPU_AVX512VL;
 945     _features &= ~CPU_AVX512_VPOPCNTDQ;
 946     _features &= ~CPU_AVX512_VPCLMULQDQ;
 947     _features &= ~CPU_AVX512_VAES;
 948     _features &= ~CPU_AVX512_VNNI;
 949     _features &= ~CPU_AVX512_VBMI;
 950     _features &= ~CPU_AVX512_VBMI2;
 951     _features &= ~CPU_AVX512_BITALG;
 952     _features &= ~CPU_AVX512_IFMA;
 953   }
 954 
 955   if (UseAVX < 2)
 956     _features &= ~CPU_AVX2;
 957 
 958   if (UseAVX < 1) {
 959     _features &= ~CPU_AVX;
 960     _features &= ~CPU_VZEROUPPER;
 961     _features &= ~CPU_F16C;
 962   }
 963 
 964   if (logical_processors_per_package() == 1) {
 965     // HT processor could be installed on a system which doesn't support HT.
 966     _features &= ~CPU_HT;
 967   }
 968 
 969   if (is_intel()) { // Intel cpus specific settings
 970     if (is_knights_family()) {
 971       _features &= ~CPU_VZEROUPPER;
 972       _features &= ~CPU_AVX512BW;
 973       _features &= ~CPU_AVX512VL;
 974       _features &= ~CPU_AVX512DQ;
 975       _features &= ~CPU_AVX512_VNNI;
 976       _features &= ~CPU_AVX512_VAES;
 977       _features &= ~CPU_AVX512_VPOPCNTDQ;
 978       _features &= ~CPU_AVX512_VPCLMULQDQ;
 979       _features &= ~CPU_AVX512_VBMI;
 980       _features &= ~CPU_AVX512_VBMI2;
 981       _features &= ~CPU_CLWB;
 982       _features &= ~CPU_FLUSHOPT;
 983       _features &= ~CPU_GFNI;
 984       _features &= ~CPU_AVX512_BITALG;
 985       _features &= ~CPU_AVX512_IFMA;
 986     }
 987   }
 988 
 989   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
 990     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
 991   } else {
 992     _has_intel_jcc_erratum = IntelJccErratumMitigation;
 993   }
 994 
 995   char buf[1024];
 996   int res = jio_snprintf(
 997               buf, sizeof(buf),
 998               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
 999               cores_per_cpu(), threads_per_core(),
1000               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1001   assert(res > 0, "not enough temporary space allocated");
1002   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1003 
1004   _features_string = os::strdup(buf);
1005 
1006   // Use AES instructions if available.
1007   if (supports_aes()) {
1008     if (FLAG_IS_DEFAULT(UseAES)) {
1009       FLAG_SET_DEFAULT(UseAES, true);
1010     }
1011     if (!UseAES) {
1012       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1013         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1014       }
1015       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1016     } else {
1017       if (UseSSE > 2) {
1018         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1019           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1020         }
1021       } else {
1022         // The AES intrinsic stubs require AES instruction support (of course)
1023         // but also require sse3 mode or higher for instructions it use.
1024         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1025           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1026         }
1027         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1028       }
1029 
1030       // --AES-CTR begins--
1031       if (!UseAESIntrinsics) {
1032         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1033           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1034           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1035         }
1036       } else {
1037         if (supports_sse4_1()) {
1038           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1039             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1040           }
1041         } else {
1042            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1043            // but also require sse4.1 mode or higher for instructions it use.
1044           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1045              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1046            }
1047            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1048         }
1049       }
1050       // --AES-CTR ends--
1051     }
1052   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1053     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1054       warning("AES instructions are not available on this CPU");
1055       FLAG_SET_DEFAULT(UseAES, false);
1056     }
1057     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1058       warning("AES intrinsics are not available on this CPU");
1059       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1060     }
1061     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1062       warning("AES-CTR intrinsics are not available on this CPU");
1063       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1064     }
1065   }
1066 
1067   // Use CLMUL instructions if available.
1068   if (supports_clmul()) {
1069     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1070       UseCLMUL = true;
1071     }
1072   } else if (UseCLMUL) {
1073     if (!FLAG_IS_DEFAULT(UseCLMUL))
1074       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1075     FLAG_SET_DEFAULT(UseCLMUL, false);
1076   }
1077 
1078   if (UseCLMUL && (UseSSE > 2)) {
1079     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1080       UseCRC32Intrinsics = true;
1081     }
1082   } else if (UseCRC32Intrinsics) {
1083     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1084       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1085     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1086   }
1087 
1088 #ifdef _LP64
1089   if (supports_avx2()) {
1090     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1091       UseAdler32Intrinsics = true;
1092     }
1093   } else if (UseAdler32Intrinsics) {
1094     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1095       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1096     }
1097     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1098   }
1099 #else
1100   if (UseAdler32Intrinsics) {
1101     warning("Adler32Intrinsics not available on this CPU.");
1102     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1103   }
1104 #endif
1105 
1106   if (supports_sse4_2() && supports_clmul()) {
1107     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1108       UseCRC32CIntrinsics = true;
1109     }
1110   } else if (UseCRC32CIntrinsics) {
1111     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1112       warning("CRC32C intrinsics are not available on this CPU");
1113     }
1114     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1115   }
1116 
1117   // GHASH/GCM intrinsics
1118   if (UseCLMUL && (UseSSE > 2)) {
1119     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1120       UseGHASHIntrinsics = true;
1121     }
1122   } else if (UseGHASHIntrinsics) {
1123     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1124       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1125     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1126   }
1127 
1128   // ChaCha20 Intrinsics
1129   // As long as the system supports AVX as a baseline we can do a
1130   // SIMD-enabled block function.  StubGenerator makes the determination
1131   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1132   // version.
1133   if (UseAVX >= 1) {
1134       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1135           UseChaCha20Intrinsics = true;
1136       }
1137   } else if (UseChaCha20Intrinsics) {
1138       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1139           warning("ChaCha20 intrinsic requires AVX instructions");
1140       }
1141       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1142   }
1143 
1144   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1145   if (UseAVX >= 2) {
1146     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1147       UseBASE64Intrinsics = true;
1148     }
1149   } else if (UseBASE64Intrinsics) {
1150      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1151       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1152     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1153   }
1154 
1155   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1156     if (FLAG_IS_DEFAULT(UseFMA)) {
1157       UseFMA = true;
1158     }
1159   } else if (UseFMA) {
1160     warning("FMA instructions are not available on this CPU");
1161     FLAG_SET_DEFAULT(UseFMA, false);
1162   }
1163 
1164   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1165     UseMD5Intrinsics = true;
1166   }
1167 
1168   if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
1169     if (FLAG_IS_DEFAULT(UseSHA)) {
1170       UseSHA = true;
1171     }
1172   } else if (UseSHA) {
1173     warning("SHA instructions are not available on this CPU");
1174     FLAG_SET_DEFAULT(UseSHA, false);
1175   }
1176 
1177   if (supports_sha() && supports_sse4_1() && UseSHA) {
1178     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1179       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1180     }
1181   } else if (UseSHA1Intrinsics) {
1182     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1183     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1184   }
1185 
1186   if (supports_sse4_1() && UseSHA) {
1187     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1188       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1189     }
1190   } else if (UseSHA256Intrinsics) {
1191     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1192     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1193   }
1194 
1195 #ifdef _LP64
1196   // These are only supported on 64-bit
1197   if (UseSHA && supports_avx2() && supports_bmi2()) {
1198     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1199       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1200     }
1201   } else
1202 #endif
1203   if (UseSHA512Intrinsics) {
1204     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1205     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1206   }
1207 
1208   if (UseSHA3Intrinsics) {
1209     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1210     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1211   }
1212 
1213   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1214     FLAG_SET_DEFAULT(UseSHA, false);
1215   }
1216 
1217   if (!supports_rtm() && UseRTMLocking) {
1218     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1219   }
1220 
1221 #if INCLUDE_RTM_OPT
1222   if (UseRTMLocking) {
1223     if (!CompilerConfig::is_c2_enabled()) {
1224       // Only C2 does RTM locking optimization.
1225       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1226     }
1227     if (is_intel_family_core()) {
1228       if ((_model == CPU_MODEL_HASWELL_E3) ||
1229           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1230           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1231         // currently a collision between SKL and HSW_E3
1232         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1233           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1234                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1235         } else {
1236           warning("UseRTMLocking is only available as experimental option on this platform.");
1237         }
1238       }
1239     }
1240     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1241       // RTM locking should be used only for applications with
1242       // high lock contention. For now we do not use it by default.
1243       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1244     }
1245   } else { // !UseRTMLocking
1246     if (UseRTMForStackLocks) {
1247       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1248         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1249       }
1250       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1251     }
1252     if (UseRTMDeopt) {
1253       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1254     }
1255     if (PrintPreciseRTMLockingStatistics) {
1256       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1257     }
1258   }
1259 #else
1260   if (UseRTMLocking) {
1261     // Only C2 does RTM locking optimization.
1262     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1263   }
1264 #endif
1265 
1266 #ifdef COMPILER2
1267   if (UseFPUForSpilling) {
1268     if (UseSSE < 2) {
1269       // Only supported with SSE2+
1270       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1271     }
1272   }
1273 #endif
1274 
1275 #if COMPILER2_OR_JVMCI
1276   int max_vector_size = 0;
1277   if (UseSSE < 2) {
1278     // Vectors (in XMM) are only supported with SSE2+
1279     // SSE is always 2 on x64.
1280     max_vector_size = 0;
1281   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1282     // 16 byte vectors (in XMM) are supported with SSE2+
1283     max_vector_size = 16;
1284   } else if (UseAVX == 1 || UseAVX == 2) {
1285     // 32 bytes vectors (in YMM) are only supported with AVX+
1286     max_vector_size = 32;
1287   } else if (UseAVX > 2) {
1288     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1289     max_vector_size = 64;
1290   }
1291 
1292 #ifdef _LP64
1293   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1294 #else
1295   int min_vector_size = 0;
1296 #endif
1297 
1298   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1299     if (MaxVectorSize < min_vector_size) {
1300       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1301       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1302     }
1303     if (MaxVectorSize > max_vector_size) {
1304       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1305       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1306     }
1307     if (!is_power_of_2(MaxVectorSize)) {
1308       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1309       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1310     }
1311   } else {
1312     // If default, use highest supported configuration
1313     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1314   }
1315 
1316 #if defined(COMPILER2) && defined(ASSERT)
1317   if (MaxVectorSize > 0) {
1318     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1319       tty->print_cr("State of YMM registers after signal handle:");
1320       int nreg = 2 LP64_ONLY(+2);
1321       const char* ymm_name[4] = {"0", "7", "8", "15"};
1322       for (int i = 0; i < nreg; i++) {
1323         tty->print("YMM%s:", ymm_name[i]);
1324         for (int j = 7; j >=0; j--) {
1325           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1326         }
1327         tty->cr();
1328       }
1329     }
1330   }
1331 #endif // COMPILER2 && ASSERT
1332 
1333 #ifdef _LP64
1334   if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) {
1335     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1336       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1337     }
1338   } else
1339 #endif
1340   if (UsePoly1305Intrinsics) {
1341     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1342     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1343   }
1344 
1345 #ifdef _LP64
1346   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1347     UseMultiplyToLenIntrinsic = true;
1348   }
1349   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1350     UseSquareToLenIntrinsic = true;
1351   }
1352   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1353     UseMulAddIntrinsic = true;
1354   }
1355   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1356     UseMontgomeryMultiplyIntrinsic = true;
1357   }
1358   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1359     UseMontgomerySquareIntrinsic = true;
1360   }
1361 #else
1362   if (UseMultiplyToLenIntrinsic) {
1363     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1364       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1365     }
1366     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1367   }
1368   if (UseMontgomeryMultiplyIntrinsic) {
1369     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1370       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1371     }
1372     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1373   }
1374   if (UseMontgomerySquareIntrinsic) {
1375     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1376       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1377     }
1378     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1379   }
1380   if (UseSquareToLenIntrinsic) {
1381     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1382       warning("squareToLen intrinsic is not available in 32-bit VM");
1383     }
1384     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1385   }
1386   if (UseMulAddIntrinsic) {
1387     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1388       warning("mulAdd intrinsic is not available in 32-bit VM");
1389     }
1390     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1391   }
1392 #endif // _LP64
1393 #endif // COMPILER2_OR_JVMCI
1394 
1395   // On new cpus instructions which update whole XMM register should be used
1396   // to prevent partial register stall due to dependencies on high half.
1397   //
1398   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1399   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1400   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1401   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1402 
1403 
1404   if (is_zx()) { // ZX cpus specific settings
1405     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1406       UseStoreImmI16 = false; // don't use it on ZX cpus
1407     }
1408     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1409       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1410         // Use it on all ZX cpus
1411         UseAddressNop = true;
1412       }
1413     }
1414     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1415       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1416     }
1417     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1418       if (supports_sse3()) {
1419         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1420       } else {
1421         UseXmmRegToRegMoveAll = false;
1422       }
1423     }
1424     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1425 #ifdef COMPILER2
1426       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1427         // For new ZX cpus do the next optimization:
1428         // don't align the beginning of a loop if there are enough instructions
1429         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1430         // in current fetch line (OptoLoopAlignment) or the padding
1431         // is big (> MaxLoopPad).
1432         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1433         // generated NOP instructions. 11 is the largest size of one
1434         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1435         MaxLoopPad = 11;
1436       }
1437 #endif // COMPILER2
1438       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1439         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1440       }
1441       if (supports_sse4_2()) { // new ZX cpus
1442         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1443           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1444         }
1445       }
1446       if (supports_sse4_2()) {
1447         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1448           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1449         }
1450       } else {
1451         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1452           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1453         }
1454         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1455       }
1456     }
1457 
1458     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1459       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1460     }
1461   }
1462 
1463   if (is_amd_family()) { // AMD cpus specific settings
1464     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1465       // Use it on new AMD cpus starting from Opteron.
1466       UseAddressNop = true;
1467     }
1468     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1469       // Use it on new AMD cpus starting from Opteron.
1470       UseNewLongLShift = true;
1471     }
1472     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1473       if (supports_sse4a()) {
1474         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1475       } else {
1476         UseXmmLoadAndClearUpper = false;
1477       }
1478     }
1479     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1480       if (supports_sse4a()) {
1481         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1482       } else {
1483         UseXmmRegToRegMoveAll = false;
1484       }
1485     }
1486     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1487       if (supports_sse4a()) {
1488         UseXmmI2F = true;
1489       } else {
1490         UseXmmI2F = false;
1491       }
1492     }
1493     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1494       if (supports_sse4a()) {
1495         UseXmmI2D = true;
1496       } else {
1497         UseXmmI2D = false;
1498       }
1499     }
1500     if (supports_sse4_2()) {
1501       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1502         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1503       }
1504     } else {
1505       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1506         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1507       }
1508       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1509     }
1510 
1511     // some defaults for AMD family 15h
1512     if (cpu_family() == 0x15) {
1513       // On family 15h processors default is no sw prefetch
1514       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1515         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1516       }
1517       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1518       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1519         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1520       }
1521       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1522       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1523         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1524       }
1525       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1526         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1527       }
1528     }
1529 
1530 #ifdef COMPILER2
1531     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1532       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1533       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1534     }
1535 #endif // COMPILER2
1536 
1537     // Some defaults for AMD family >= 17h && Hygon family 18h
1538     if (cpu_family() >= 0x17) {
1539       // On family >=17h processors use XMM and UnalignedLoadStores
1540       // for Array Copy
1541       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1542         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1543       }
1544       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1545         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1546       }
1547 #ifdef COMPILER2
1548       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1549         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1550       }
1551 #endif
1552     }
1553   }
1554 
1555   if (is_intel()) { // Intel cpus specific settings
1556     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1557       UseStoreImmI16 = false; // don't use it on Intel cpus
1558     }
1559     if (cpu_family() == 6 || cpu_family() == 15) {
1560       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1561         // Use it on all Intel cpus starting from PentiumPro
1562         UseAddressNop = true;
1563       }
1564     }
1565     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1566       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1567     }
1568     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1569       if (supports_sse3()) {
1570         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1571       } else {
1572         UseXmmRegToRegMoveAll = false;
1573       }
1574     }
1575     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1576 #ifdef COMPILER2
1577       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1578         // For new Intel cpus do the next optimization:
1579         // don't align the beginning of a loop if there are enough instructions
1580         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1581         // in current fetch line (OptoLoopAlignment) or the padding
1582         // is big (> MaxLoopPad).
1583         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1584         // generated NOP instructions. 11 is the largest size of one
1585         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1586         MaxLoopPad = 11;
1587       }
1588 #endif // COMPILER2
1589 
1590       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1591         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1592       }
1593       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1594         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1595           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1596         }
1597       }
1598       if (supports_sse4_2()) {
1599         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1600           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1601         }
1602       } else {
1603         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1604           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1605         }
1606         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1607       }
1608     }
1609     if (is_atom_family() || is_knights_family()) {
1610 #ifdef COMPILER2
1611       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1612         OptoScheduling = true;
1613       }
1614 #endif
1615       if (supports_sse4_2()) { // Silvermont
1616         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1617           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1618         }
1619       }
1620       if (FLAG_IS_DEFAULT(UseIncDec)) {
1621         FLAG_SET_DEFAULT(UseIncDec, false);
1622       }
1623     }
1624     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1625       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1626     }
1627 #ifdef COMPILER2
1628     if (UseAVX > 2) {
1629       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1630           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1631            ArrayOperationPartialInlineSize != 0 &&
1632            ArrayOperationPartialInlineSize != 16 &&
1633            ArrayOperationPartialInlineSize != 32 &&
1634            ArrayOperationPartialInlineSize != 64)) {
1635         int inline_size = 0;
1636         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1637           inline_size = 64;
1638         } else if (MaxVectorSize >= 32) {
1639           inline_size = 32;
1640         } else if (MaxVectorSize >= 16) {
1641           inline_size = 16;
1642         }
1643         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1644           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1645         }
1646         ArrayOperationPartialInlineSize = inline_size;
1647       }
1648 
1649       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1650         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1651         if (ArrayOperationPartialInlineSize) {
1652           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1653         } else {
1654           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1655         }
1656       }
1657     }
1658 #endif
1659   }
1660 
1661 #ifdef COMPILER2
1662   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1663     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1664       OptimizeFill = false;
1665     }
1666   }
1667 #endif
1668 
1669 #ifdef _LP64
1670   if (UseSSE42Intrinsics) {
1671     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1672       UseVectorizedMismatchIntrinsic = true;
1673     }
1674   } else if (UseVectorizedMismatchIntrinsic) {
1675     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1676       warning("vectorizedMismatch intrinsics are not available on this CPU");
1677     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1678   }
1679   if (UseAVX >= 2) {
1680     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1681   } else if (UseVectorizedHashCodeIntrinsic) {
1682     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1683       warning("vectorizedHashCode intrinsics are not available on this CPU");
1684     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1685   }
1686 #else
1687   if (UseVectorizedMismatchIntrinsic) {
1688     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1689       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1690     }
1691     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1692   }
1693   if (UseVectorizedHashCodeIntrinsic) {
1694     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1695       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1696     }
1697     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1698   }
1699 #endif // _LP64
1700 
1701   // Use count leading zeros count instruction if available.
1702   if (supports_lzcnt()) {
1703     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1704       UseCountLeadingZerosInstruction = true;
1705     }
1706    } else if (UseCountLeadingZerosInstruction) {
1707     warning("lzcnt instruction is not available on this CPU");
1708     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1709   }
1710 
1711   // Use count trailing zeros instruction if available
1712   if (supports_bmi1()) {
1713     // tzcnt does not require VEX prefix
1714     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1715       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1716         // Don't use tzcnt if BMI1 is switched off on command line.
1717         UseCountTrailingZerosInstruction = false;
1718       } else {
1719         UseCountTrailingZerosInstruction = true;
1720       }
1721     }
1722   } else if (UseCountTrailingZerosInstruction) {
1723     warning("tzcnt instruction is not available on this CPU");
1724     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1725   }
1726 
1727   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1728   // VEX prefix is generated only when AVX > 0.
1729   if (supports_bmi1() && supports_avx()) {
1730     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1731       UseBMI1Instructions = true;
1732     }
1733   } else if (UseBMI1Instructions) {
1734     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1735     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1736   }
1737 
1738   if (supports_bmi2() && supports_avx()) {
1739     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1740       UseBMI2Instructions = true;
1741     }
1742   } else if (UseBMI2Instructions) {
1743     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1744     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1745   }
1746 
1747   // Use population count instruction if available.
1748   if (supports_popcnt()) {
1749     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1750       UsePopCountInstruction = true;
1751     }
1752   } else if (UsePopCountInstruction) {
1753     warning("POPCNT instruction is not available on this CPU");
1754     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1755   }
1756 
1757   // Use fast-string operations if available.
1758   if (supports_erms()) {
1759     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1760       UseFastStosb = true;
1761     }
1762   } else if (UseFastStosb) {
1763     warning("fast-string operations are not available on this CPU");
1764     FLAG_SET_DEFAULT(UseFastStosb, false);
1765   }
1766 
1767   // For AMD Processors use XMM/YMM MOVDQU instructions
1768   // for Object Initialization as default
1769   if (is_amd() && cpu_family() >= 0x19) {
1770     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1771       UseFastStosb = false;
1772     }
1773   }
1774 
1775 #ifdef COMPILER2
1776   if (is_intel() && MaxVectorSize > 16) {
1777     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1778       UseFastStosb = false;
1779     }
1780   }
1781 #endif
1782 
1783   // Use XMM/YMM MOVDQU instruction for Object Initialization
1784   if (UseSSE >= 2 && UseUnalignedLoadStores) {
1785     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1786       UseXMMForObjInit = true;
1787     }
1788   } else if (UseXMMForObjInit) {
1789     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1790     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1791   }
1792 
1793 #ifdef COMPILER2
1794   if (FLAG_IS_DEFAULT(AlignVector)) {
1795     // Modern processors allow misaligned memory operations for vectors.
1796     AlignVector = !UseUnalignedLoadStores;
1797   }
1798 #endif // COMPILER2
1799 
1800   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1801     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1802       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1803     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1804       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1805     }
1806   }
1807 
1808   // Allocation prefetch settings
1809   int cache_line_size = checked_cast<int>(prefetch_data_size());
1810   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1811       (cache_line_size > AllocatePrefetchStepSize)) {
1812     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1813   }
1814 
1815   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1816     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1817     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1818       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1819     }
1820     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1821   }
1822 
1823   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1824     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1825     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1826   }
1827 
1828   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1829     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1830         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1831       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1832     }
1833 #ifdef COMPILER2
1834     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1835       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1836     }
1837 #endif
1838   }
1839 
1840   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1841 #ifdef COMPILER2
1842     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1843       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1844     }
1845 #endif
1846   }
1847 
1848 #ifdef _LP64
1849   // Prefetch settings
1850 
1851   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1852   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1853   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1854   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1855 
1856   // gc copy/scan is disabled if prefetchw isn't supported, because
1857   // Prefetch::write emits an inlined prefetchw on Linux.
1858   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1859   // The used prefetcht0 instruction works for both amd64 and em64t.
1860 
1861   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1862     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1863   }
1864   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1865     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1866   }
1867 #endif
1868 
1869   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1870      (cache_line_size > ContendedPaddingWidth))
1871      ContendedPaddingWidth = cache_line_size;
1872 
1873   // This machine allows unaligned memory accesses
1874   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1875     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1876   }
1877 
1878 #ifndef PRODUCT
1879   if (log_is_enabled(Info, os, cpu)) {
1880     LogStream ls(Log(os, cpu)::info());
1881     outputStream* log = &ls;
1882     log->print_cr("Logical CPUs per core: %u",
1883                   logical_processors_per_package());
1884     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1885     log->print("UseSSE=%d", UseSSE);
1886     if (UseAVX > 0) {
1887       log->print("  UseAVX=%d", UseAVX);
1888     }
1889     if (UseAES) {
1890       log->print("  UseAES=1");
1891     }
1892 #ifdef COMPILER2
1893     if (MaxVectorSize > 0) {
1894       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1895     }
1896 #endif
1897     log->cr();
1898     log->print("Allocation");
1899     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1900       log->print_cr(": no prefetching");
1901     } else {
1902       log->print(" prefetching: ");
1903       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1904         log->print("PREFETCHW");
1905       } else if (UseSSE >= 1) {
1906         if (AllocatePrefetchInstr == 0) {
1907           log->print("PREFETCHNTA");
1908         } else if (AllocatePrefetchInstr == 1) {
1909           log->print("PREFETCHT0");
1910         } else if (AllocatePrefetchInstr == 2) {
1911           log->print("PREFETCHT2");
1912         } else if (AllocatePrefetchInstr == 3) {
1913           log->print("PREFETCHW");
1914         }
1915       }
1916       if (AllocatePrefetchLines > 1) {
1917         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1918       } else {
1919         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1920       }
1921     }
1922 
1923     if (PrefetchCopyIntervalInBytes > 0) {
1924       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1925     }
1926     if (PrefetchScanIntervalInBytes > 0) {
1927       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1928     }
1929     if (ContendedPaddingWidth > 0) {
1930       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1931     }
1932   }
1933 #endif // !PRODUCT
1934   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1935       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1936   }
1937   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1938       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1939   }
1940 }
1941 
1942 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1943   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1944   if (vrt == XenHVM) {
1945     st->print_cr("Xen hardware-assisted virtualization detected");
1946   } else if (vrt == KVM) {
1947     st->print_cr("KVM virtualization detected");
1948   } else if (vrt == VMWare) {
1949     st->print_cr("VMWare virtualization detected");
1950     VirtualizationSupport::print_virtualization_info(st);
1951   } else if (vrt == HyperV) {
1952     st->print_cr("Hyper-V virtualization detected");
1953   } else if (vrt == HyperVRole) {
1954     st->print_cr("Hyper-V role detected");
1955   }
1956 }
1957 
1958 bool VM_Version::compute_has_intel_jcc_erratum() {
1959   if (!is_intel_family_core()) {
1960     // Only Intel CPUs are affected.
1961     return false;
1962   }
1963   // The following table of affected CPUs is based on the following document released by Intel:
1964   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1965   switch (_model) {
1966   case 0x8E:
1967     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1968     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1969     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1970     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1971     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1972     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1973     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1974     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1975     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1976     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1977   case 0x4E:
1978     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1979     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1980     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1981     return _stepping == 0x3;
1982   case 0x55:
1983     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1984     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1985     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1986     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1987     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1988     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1989     return _stepping == 0x4 || _stepping == 0x7;
1990   case 0x5E:
1991     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1992     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1993     return _stepping == 0x3;
1994   case 0x9E:
1995     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1996     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1997     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1998     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1999     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2000     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2001     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2002     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2003     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2004     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2005     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2006     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2007     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2008     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2009     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2010   case 0xA5:
2011     // Not in Intel documentation.
2012     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2013     return true;
2014   case 0xA6:
2015     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2016     return _stepping == 0x0;
2017   case 0xAE:
2018     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2019     return _stepping == 0xA;
2020   default:
2021     // If we are running on another intel machine not recognized in the table, we are okay.
2022     return false;
2023   }
2024 }
2025 
2026 // On Xen, the cpuid instruction returns
2027 //  eax / registers[0]: Version of Xen
2028 //  ebx / registers[1]: chars 'XenV'
2029 //  ecx / registers[2]: chars 'MMXe'
2030 //  edx / registers[3]: chars 'nVMM'
2031 //
2032 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2033 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2034 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2035 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2036 //
2037 // more information :
2038 // https://kb.vmware.com/s/article/1009458
2039 //
2040 void VM_Version::check_virtualizations() {
2041   uint32_t registers[4] = {0};
2042   char signature[13] = {0};
2043 
2044   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2045   // from 0x40000000 until 0x40010000.
2046   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2047   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2048     detect_virt_stub(leaf, registers);
2049     memcpy(signature, &registers[1], 12);
2050 
2051     if (strncmp("VMwareVMware", signature, 12) == 0) {
2052       Abstract_VM_Version::_detected_virtualization = VMWare;
2053       // check for extended metrics from guestlib
2054       VirtualizationSupport::initialize();
2055     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2056       Abstract_VM_Version::_detected_virtualization = HyperV;
2057 #ifdef _WINDOWS
2058       // CPUID leaf 0x40000007 is available to the root partition only.
2059       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2060       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2061       detect_virt_stub(0x40000007, registers);
2062       if ((registers[0] != 0x0) ||
2063           (registers[1] != 0x0) ||
2064           (registers[2] != 0x0) ||
2065           (registers[3] != 0x0)) {
2066         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2067       }
2068 #endif
2069     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2070       Abstract_VM_Version::_detected_virtualization = KVM;
2071     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2072       Abstract_VM_Version::_detected_virtualization = XenHVM;
2073     }
2074   }
2075 }
2076 
2077 #ifdef COMPILER2
2078 // Determine if it's running on Cascade Lake using default options.
2079 bool VM_Version::is_default_intel_cascade_lake() {
2080   return FLAG_IS_DEFAULT(UseAVX) &&
2081          FLAG_IS_DEFAULT(MaxVectorSize) &&
2082          UseAVX > 2 &&
2083          is_intel_cascade_lake();
2084 }
2085 #endif
2086 
2087 bool VM_Version::is_intel_cascade_lake() {
2088   return is_intel_skylake() && _stepping >= 5;
2089 }
2090 
2091 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2092 // for implementing the array copy and clear operations.
2093 // The Intel platforms that supports the serialize instruction
2094 // has improved implementation of 64-byte load/stores and so the default
2095 // threshold is set to 0 for these platforms.
2096 int VM_Version::avx3_threshold() {
2097   return (is_intel_family_core() &&
2098           supports_serialize() &&
2099           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2100 }
2101 
2102 static bool _vm_version_initialized = false;
2103 
2104 void VM_Version::initialize() {
2105   ResourceMark rm;
2106   // Making this stub must be FIRST use of assembler
2107   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2108   if (stub_blob == nullptr) {
2109     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2110   }
2111   CodeBuffer c(stub_blob);
2112   VM_Version_StubGenerator g(&c);
2113 
2114   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2115                                      g.generate_get_cpu_info());
2116   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2117                                      g.generate_detect_virt());
2118 
2119   get_processor_features();
2120 
2121   LP64_ONLY(Assembler::precompute_instructions();)
2122 
2123   if (VM_Version::supports_hv()) { // Supports hypervisor
2124     check_virtualizations();
2125   }
2126   _vm_version_initialized = true;
2127 }
2128 
2129 typedef enum {
2130    CPU_FAMILY_8086_8088  = 0,
2131    CPU_FAMILY_INTEL_286  = 2,
2132    CPU_FAMILY_INTEL_386  = 3,
2133    CPU_FAMILY_INTEL_486  = 4,
2134    CPU_FAMILY_PENTIUM    = 5,
2135    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2136    CPU_FAMILY_PENTIUM_4  = 0xF
2137 } FamilyFlag;
2138 
2139 typedef enum {
2140   RDTSCP_FLAG  = 0x08000000, // bit 27
2141   INTEL64_FLAG = 0x20000000  // bit 29
2142 } _featureExtendedEdxFlag;
2143 
2144 typedef enum {
2145    FPU_FLAG     = 0x00000001,
2146    VME_FLAG     = 0x00000002,
2147    DE_FLAG      = 0x00000004,
2148    PSE_FLAG     = 0x00000008,
2149    TSC_FLAG     = 0x00000010,
2150    MSR_FLAG     = 0x00000020,
2151    PAE_FLAG     = 0x00000040,
2152    MCE_FLAG     = 0x00000080,
2153    CX8_FLAG     = 0x00000100,
2154    APIC_FLAG    = 0x00000200,
2155    SEP_FLAG     = 0x00000800,
2156    MTRR_FLAG    = 0x00001000,
2157    PGE_FLAG     = 0x00002000,
2158    MCA_FLAG     = 0x00004000,
2159    CMOV_FLAG    = 0x00008000,
2160    PAT_FLAG     = 0x00010000,
2161    PSE36_FLAG   = 0x00020000,
2162    PSNUM_FLAG   = 0x00040000,
2163    CLFLUSH_FLAG = 0x00080000,
2164    DTS_FLAG     = 0x00200000,
2165    ACPI_FLAG    = 0x00400000,
2166    MMX_FLAG     = 0x00800000,
2167    FXSR_FLAG    = 0x01000000,
2168    SSE_FLAG     = 0x02000000,
2169    SSE2_FLAG    = 0x04000000,
2170    SS_FLAG      = 0x08000000,
2171    HTT_FLAG     = 0x10000000,
2172    TM_FLAG      = 0x20000000
2173 } FeatureEdxFlag;
2174 
2175 static BufferBlob* cpuid_brand_string_stub_blob;
2176 static const int   cpuid_brand_string_stub_size = 550;
2177 
2178 extern "C" {
2179   typedef void (*getCPUIDBrandString_stub_t)(void*);
2180 }
2181 
2182 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2183 
2184 // VM_Version statics
2185 enum {
2186   ExtendedFamilyIdLength_INTEL = 16,
2187   ExtendedFamilyIdLength_AMD   = 24
2188 };
2189 
2190 const size_t VENDOR_LENGTH = 13;
2191 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2192 static char* _cpu_brand_string = nullptr;
2193 static int64_t _max_qualified_cpu_frequency = 0;
2194 
2195 static int _no_of_threads = 0;
2196 static int _no_of_cores = 0;
2197 
2198 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2199   "8086/8088",
2200   "",
2201   "286",
2202   "386",
2203   "486",
2204   "Pentium",
2205   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2206   "",
2207   "",
2208   "",
2209   "",
2210   "",
2211   "",
2212   "",
2213   "",
2214   "Pentium 4"
2215 };
2216 
2217 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2218   "",
2219   "",
2220   "",
2221   "",
2222   "5x86",
2223   "K5/K6",
2224   "Athlon/AthlonXP",
2225   "",
2226   "",
2227   "",
2228   "",
2229   "",
2230   "",
2231   "",
2232   "",
2233   "Opteron/Athlon64",
2234   "Opteron QC/Phenom",  // Barcelona et.al.
2235   "",
2236   "",
2237   "",
2238   "",
2239   "",
2240   "",
2241   "Zen"
2242 };
2243 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2244 // September 2013, Vol 3C Table 35-1
2245 const char* const _model_id_pentium_pro[] = {
2246   "",
2247   "Pentium Pro",
2248   "",
2249   "Pentium II model 3",
2250   "",
2251   "Pentium II model 5/Xeon/Celeron",
2252   "Celeron",
2253   "Pentium III/Pentium III Xeon",
2254   "Pentium III/Pentium III Xeon",
2255   "Pentium M model 9",    // Yonah
2256   "Pentium III, model A",
2257   "Pentium III, model B",
2258   "",
2259   "Pentium M model D",    // Dothan
2260   "",
2261   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2262   "",
2263   "",
2264   "",
2265   "",
2266   "",
2267   "",
2268   "Celeron",              // 0x16 Celeron 65nm
2269   "Core 2",               // 0x17 Penryn / Harpertown
2270   "",
2271   "",
2272   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2273   "Atom",                 // 0x1B Z5xx series Silverthorn
2274   "",
2275   "Core 2",               // 0x1D Dunnington (6-core)
2276   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2277   "",
2278   "",
2279   "",
2280   "",
2281   "",
2282   "",
2283   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2284   "",
2285   "",
2286   "",                     // 0x28
2287   "",
2288   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2289   "",
2290   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2291   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2292   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2293   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2294   "",
2295   "",
2296   "",
2297   "",
2298   "",
2299   "",
2300   "",
2301   "",
2302   "",
2303   "",
2304   "Ivy Bridge",           // 0x3a
2305   "",
2306   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2307   "",                     // 0x3d "Next Generation Intel Core Processor"
2308   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2309   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2310   "",
2311   "",
2312   "",
2313   "",
2314   "",
2315   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2316   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2317   nullptr
2318 };
2319 
2320 /* Brand ID is for back compatibility
2321  * Newer CPUs uses the extended brand string */
2322 const char* const _brand_id[] = {
2323   "",
2324   "Celeron processor",
2325   "Pentium III processor",
2326   "Intel Pentium III Xeon processor",
2327   "",
2328   "",
2329   "",
2330   "",
2331   "Intel Pentium 4 processor",
2332   nullptr
2333 };
2334 
2335 
2336 const char* const _feature_edx_id[] = {
2337   "On-Chip FPU",
2338   "Virtual Mode Extensions",
2339   "Debugging Extensions",
2340   "Page Size Extensions",
2341   "Time Stamp Counter",
2342   "Model Specific Registers",
2343   "Physical Address Extension",
2344   "Machine Check Exceptions",
2345   "CMPXCHG8B Instruction",
2346   "On-Chip APIC",
2347   "",
2348   "Fast System Call",
2349   "Memory Type Range Registers",
2350   "Page Global Enable",
2351   "Machine Check Architecture",
2352   "Conditional Mov Instruction",
2353   "Page Attribute Table",
2354   "36-bit Page Size Extension",
2355   "Processor Serial Number",
2356   "CLFLUSH Instruction",
2357   "",
2358   "Debug Trace Store feature",
2359   "ACPI registers in MSR space",
2360   "Intel Architecture MMX Technology",
2361   "Fast Float Point Save and Restore",
2362   "Streaming SIMD extensions",
2363   "Streaming SIMD extensions 2",
2364   "Self-Snoop",
2365   "Hyper Threading",
2366   "Thermal Monitor",
2367   "",
2368   "Pending Break Enable"
2369 };
2370 
2371 const char* const _feature_extended_edx_id[] = {
2372   "",
2373   "",
2374   "",
2375   "",
2376   "",
2377   "",
2378   "",
2379   "",
2380   "",
2381   "",
2382   "",
2383   "SYSCALL/SYSRET",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "",
2392   "Execute Disable Bit",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "",
2399   "RDTSCP",
2400   "",
2401   "Intel 64 Architecture",
2402   "",
2403   ""
2404 };
2405 
2406 const char* const _feature_ecx_id[] = {
2407   "Streaming SIMD Extensions 3",
2408   "PCLMULQDQ",
2409   "64-bit DS Area",
2410   "MONITOR/MWAIT instructions",
2411   "CPL Qualified Debug Store",
2412   "Virtual Machine Extensions",
2413   "Safer Mode Extensions",
2414   "Enhanced Intel SpeedStep technology",
2415   "Thermal Monitor 2",
2416   "Supplemental Streaming SIMD Extensions 3",
2417   "L1 Context ID",
2418   "",
2419   "Fused Multiply-Add",
2420   "CMPXCHG16B",
2421   "xTPR Update Control",
2422   "Perfmon and Debug Capability",
2423   "",
2424   "Process-context identifiers",
2425   "Direct Cache Access",
2426   "Streaming SIMD extensions 4.1",
2427   "Streaming SIMD extensions 4.2",
2428   "x2APIC",
2429   "MOVBE",
2430   "Popcount instruction",
2431   "TSC-Deadline",
2432   "AESNI",
2433   "XSAVE",
2434   "OSXSAVE",
2435   "AVX",
2436   "F16C",
2437   "RDRAND",
2438   ""
2439 };
2440 
2441 const char* const _feature_extended_ecx_id[] = {
2442   "LAHF/SAHF instruction support",
2443   "Core multi-processor legacy mode",
2444   "",
2445   "",
2446   "",
2447   "Advanced Bit Manipulations: LZCNT",
2448   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2449   "Misaligned SSE mode",
2450   "",
2451   "",
2452   "",
2453   "",
2454   "",
2455   "",
2456   "",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   ""
2474 };
2475 
2476 void VM_Version::initialize_tsc(void) {
2477   ResourceMark rm;
2478 
2479   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2480   if (cpuid_brand_string_stub_blob == nullptr) {
2481     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2482   }
2483   CodeBuffer c(cpuid_brand_string_stub_blob);
2484   VM_Version_StubGenerator g(&c);
2485   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2486                                    g.generate_getCPUIDBrandString());
2487 }
2488 
2489 const char* VM_Version::cpu_model_description(void) {
2490   uint32_t cpu_family = extended_cpu_family();
2491   uint32_t cpu_model = extended_cpu_model();
2492   const char* model = nullptr;
2493 
2494   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2495     for (uint32_t i = 0; i <= cpu_model; i++) {
2496       model = _model_id_pentium_pro[i];
2497       if (model == nullptr) {
2498         break;
2499       }
2500     }
2501   }
2502   return model;
2503 }
2504 
2505 const char* VM_Version::cpu_brand_string(void) {
2506   if (_cpu_brand_string == nullptr) {
2507     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2508     if (nullptr == _cpu_brand_string) {
2509       return nullptr;
2510     }
2511     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2512     if (ret_val != OS_OK) {
2513       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2514       _cpu_brand_string = nullptr;
2515     }
2516   }
2517   return _cpu_brand_string;
2518 }
2519 
2520 const char* VM_Version::cpu_brand(void) {
2521   const char*  brand  = nullptr;
2522 
2523   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2524     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2525     brand = _brand_id[0];
2526     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2527       brand = _brand_id[i];
2528     }
2529   }
2530   return brand;
2531 }
2532 
2533 bool VM_Version::cpu_is_em64t(void) {
2534   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2535 }
2536 
2537 bool VM_Version::is_netburst(void) {
2538   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2539 }
2540 
2541 bool VM_Version::supports_tscinv_ext(void) {
2542   if (!supports_tscinv_bit()) {
2543     return false;
2544   }
2545 
2546   if (is_intel()) {
2547     return true;
2548   }
2549 
2550   if (is_amd()) {
2551     return !is_amd_Barcelona();
2552   }
2553 
2554   if (is_hygon()) {
2555     return true;
2556   }
2557 
2558   return false;
2559 }
2560 
2561 void VM_Version::resolve_cpu_information_details(void) {
2562 
2563   // in future we want to base this information on proper cpu
2564   // and cache topology enumeration such as:
2565   // Intel 64 Architecture Processor Topology Enumeration
2566   // which supports system cpu and cache topology enumeration
2567   // either using 2xAPICIDs or initial APICIDs
2568 
2569   // currently only rough cpu information estimates
2570   // which will not necessarily reflect the exact configuration of the system
2571 
2572   // this is the number of logical hardware threads
2573   // visible to the operating system
2574   _no_of_threads = os::processor_count();
2575 
2576   // find out number of threads per cpu package
2577   int threads_per_package = threads_per_core() * cores_per_cpu();
2578 
2579   // use amount of threads visible to the process in order to guess number of sockets
2580   _no_of_sockets = _no_of_threads / threads_per_package;
2581 
2582   // process might only see a subset of the total number of threads
2583   // from a single processor package. Virtualization/resource management for example.
2584   // If so then just write a hard 1 as num of pkgs.
2585   if (0 == _no_of_sockets) {
2586     _no_of_sockets = 1;
2587   }
2588 
2589   // estimate the number of cores
2590   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2591 }
2592 
2593 
2594 const char* VM_Version::cpu_family_description(void) {
2595   int cpu_family_id = extended_cpu_family();
2596   if (is_amd()) {
2597     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2598       return _family_id_amd[cpu_family_id];
2599     }
2600   }
2601   if (is_intel()) {
2602     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2603       return cpu_model_description();
2604     }
2605     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2606       return _family_id_intel[cpu_family_id];
2607     }
2608   }
2609   if (is_hygon()) {
2610     return "Dhyana";
2611   }
2612   return "Unknown x86";
2613 }
2614 
2615 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2616   assert(buf != nullptr, "buffer is null!");
2617   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2618 
2619   const char* cpu_type = nullptr;
2620   const char* x64 = nullptr;
2621 
2622   if (is_intel()) {
2623     cpu_type = "Intel";
2624     x64 = cpu_is_em64t() ? " Intel64" : "";
2625   } else if (is_amd()) {
2626     cpu_type = "AMD";
2627     x64 = cpu_is_em64t() ? " AMD64" : "";
2628   } else if (is_hygon()) {
2629     cpu_type = "Hygon";
2630     x64 = cpu_is_em64t() ? " AMD64" : "";
2631   } else {
2632     cpu_type = "Unknown x86";
2633     x64 = cpu_is_em64t() ? " x86_64" : "";
2634   }
2635 
2636   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2637     cpu_type,
2638     cpu_family_description(),
2639     supports_ht() ? " (HT)" : "",
2640     supports_sse3() ? " SSE3" : "",
2641     supports_ssse3() ? " SSSE3" : "",
2642     supports_sse4_1() ? " SSE4.1" : "",
2643     supports_sse4_2() ? " SSE4.2" : "",
2644     supports_sse4a() ? " SSE4A" : "",
2645     is_netburst() ? " Netburst" : "",
2646     is_intel_family_core() ? " Core" : "",
2647     x64);
2648 
2649   return OS_OK;
2650 }
2651 
2652 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2653   assert(buf != nullptr, "buffer is null!");
2654   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2655   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2656 
2657   // invoke newly generated asm code to fetch CPU Brand String
2658   getCPUIDBrandString_stub(&_cpuid_info);
2659 
2660   // fetch results into buffer
2661   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2662   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2663   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2664   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2665   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2666   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2667   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2668   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2669   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2670   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2671   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2672   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2673 
2674   return OS_OK;
2675 }
2676 
2677 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2678   guarantee(buf != nullptr, "buffer is null!");
2679   guarantee(buf_len > 0, "buffer len not enough!");
2680 
2681   unsigned int flag = 0;
2682   unsigned int fi = 0;
2683   size_t       written = 0;
2684   const char*  prefix = "";
2685 
2686 #define WRITE_TO_BUF(string)                                                          \
2687   {                                                                                   \
2688     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2689     if (res < 0) {                                                                    \
2690       return buf_len - 1;                                                             \
2691     }                                                                                 \
2692     written += res;                                                                   \
2693     if (prefix[0] == '\0') {                                                          \
2694       prefix = ", ";                                                                  \
2695     }                                                                                 \
2696   }
2697 
2698   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2699     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2700       continue; /* no hyperthreading */
2701     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2702       continue; /* no fast system call */
2703     }
2704     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2705       WRITE_TO_BUF(_feature_edx_id[fi]);
2706     }
2707   }
2708 
2709   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2710     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2711       WRITE_TO_BUF(_feature_ecx_id[fi]);
2712     }
2713   }
2714 
2715   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2716     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2717       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2718     }
2719   }
2720 
2721   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2722     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2723       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2724     }
2725   }
2726 
2727   if (supports_tscinv_bit()) {
2728       WRITE_TO_BUF("Invariant TSC");
2729   }
2730 
2731   return written;
2732 }
2733 
2734 /**
2735  * Write a detailed description of the cpu to a given buffer, including
2736  * feature set.
2737  */
2738 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2739   assert(buf != nullptr, "buffer is null!");
2740   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2741 
2742   static const char* unknown = "<unknown>";
2743   char               vendor_id[VENDOR_LENGTH];
2744   const char*        family = nullptr;
2745   const char*        model = nullptr;
2746   const char*        brand = nullptr;
2747   int                outputLen = 0;
2748 
2749   family = cpu_family_description();
2750   if (family == nullptr) {
2751     family = unknown;
2752   }
2753 
2754   model = cpu_model_description();
2755   if (model == nullptr) {
2756     model = unknown;
2757   }
2758 
2759   brand = cpu_brand_string();
2760 
2761   if (brand == nullptr) {
2762     brand = cpu_brand();
2763     if (brand == nullptr) {
2764       brand = unknown;
2765     }
2766   }
2767 
2768   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2769   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2770   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2771   vendor_id[VENDOR_LENGTH-1] = '\0';
2772 
2773   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2774     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2775     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2776     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2777     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2778     "Supports: ",
2779     brand,
2780     vendor_id,
2781     family,
2782     extended_cpu_family(),
2783     model,
2784     extended_cpu_model(),
2785     cpu_stepping(),
2786     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2787     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2788     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2789     _cpuid_info.std_cpuid1_eax.value,
2790     _cpuid_info.std_cpuid1_ebx.value,
2791     _cpuid_info.std_cpuid1_ecx.value,
2792     _cpuid_info.std_cpuid1_edx.value,
2793     _cpuid_info.ext_cpuid1_eax,
2794     _cpuid_info.ext_cpuid1_ebx,
2795     _cpuid_info.ext_cpuid1_ecx,
2796     _cpuid_info.ext_cpuid1_edx);
2797 
2798   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2799     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2800     return OS_ERR;
2801   }
2802 
2803   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2804 
2805   return OS_OK;
2806 }
2807 
2808 
2809 // Fill in Abstract_VM_Version statics
2810 void VM_Version::initialize_cpu_information() {
2811   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2812   assert(!_initialized, "shouldn't be initialized yet");
2813   resolve_cpu_information_details();
2814 
2815   // initialize cpu_name and cpu_desc
2816   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2817   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2818   _initialized = true;
2819 }
2820 
2821 /**
2822  *  For information about extracting the frequency from the cpu brand string, please see:
2823  *
2824  *    Intel Processor Identification and the CPUID Instruction
2825  *    Application Note 485
2826  *    May 2012
2827  *
2828  * The return value is the frequency in Hz.
2829  */
2830 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2831   const char* const brand_string = cpu_brand_string();
2832   if (brand_string == nullptr) {
2833     return 0;
2834   }
2835   const int64_t MEGA = 1000000;
2836   int64_t multiplier = 0;
2837   int64_t frequency = 0;
2838   uint8_t idx = 0;
2839   // The brand string buffer is at most 48 bytes.
2840   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2841   for (; idx < 48-2; ++idx) {
2842     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2843     // Search brand string for "yHz" where y is M, G, or T.
2844     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2845       if (brand_string[idx] == 'M') {
2846         multiplier = MEGA;
2847       } else if (brand_string[idx] == 'G') {
2848         multiplier = MEGA * 1000;
2849       } else if (brand_string[idx] == 'T') {
2850         multiplier = MEGA * MEGA;
2851       }
2852       break;
2853     }
2854   }
2855   if (multiplier > 0) {
2856     // Compute frequency (in Hz) from brand string.
2857     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2858       frequency =  (brand_string[idx-4] - '0') * multiplier;
2859       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2860       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2861     } else { // format is "xxxx"
2862       frequency =  (brand_string[idx-4] - '0') * 1000;
2863       frequency += (brand_string[idx-3] - '0') * 100;
2864       frequency += (brand_string[idx-2] - '0') * 10;
2865       frequency += (brand_string[idx-1] - '0');
2866       frequency *= multiplier;
2867     }
2868   }
2869   return frequency;
2870 }
2871 
2872 
2873 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2874   if (_max_qualified_cpu_frequency == 0) {
2875     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2876   }
2877   return _max_qualified_cpu_frequency;
2878 }
2879 
2880 uint64_t VM_Version::feature_flags() {
2881   uint64_t result = 0;
2882   if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
2883     result |= CPU_CX8;
2884   if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
2885     result |= CPU_CMOV;
2886   if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
2887     result |= CPU_FLUSH;
2888 #ifdef _LP64
2889   // clflush should always be available on x86_64
2890   // if not we are in real trouble because we rely on it
2891   // to flush the code cache.
2892   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2893 #endif
2894   if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2895       _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
2896     result |= CPU_FXSR;
2897   // HT flag is set for multi-core processors also.
2898   if (threads_per_core() > 1)
2899     result |= CPU_HT;
2900   if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2901       _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
2902     result |= CPU_MMX;
2903   if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
2904     result |= CPU_SSE;
2905   if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
2906     result |= CPU_SSE2;
2907   if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
2908     result |= CPU_SSE3;
2909   if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
2910     result |= CPU_SSSE3;
2911   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
2912     result |= CPU_SSE4_1;
2913   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
2914     result |= CPU_SSE4_2;
2915   if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
2916     result |= CPU_POPCNT;
2917   if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
2918       _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
2919       _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
2920       _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
2921     result |= CPU_AVX;
2922     result |= CPU_VZEROUPPER;
2923     if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0)
2924       result |= CPU_F16C;
2925     if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
2926       result |= CPU_AVX2;
2927     if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
2928         _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
2929         _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
2930         _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
2931       result |= CPU_AVX512F;
2932       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
2933         result |= CPU_AVX512CD;
2934       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
2935         result |= CPU_AVX512DQ;
2936       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0)
2937         result |= CPU_AVX512_IFMA;
2938       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
2939         result |= CPU_AVX512PF;
2940       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
2941         result |= CPU_AVX512ER;
2942       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
2943         result |= CPU_AVX512BW;
2944       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
2945         result |= CPU_AVX512VL;
2946       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2947         result |= CPU_AVX512_VPOPCNTDQ;
2948       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2949         result |= CPU_AVX512_VPCLMULQDQ;
2950       if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
2951         result |= CPU_AVX512_VAES;
2952       if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
2953         result |= CPU_GFNI;
2954       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
2955         result |= CPU_AVX512_VNNI;
2956       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2957         result |= CPU_AVX512_BITALG;
2958       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2959         result |= CPU_AVX512_VBMI;
2960       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2961         result |= CPU_AVX512_VBMI2;
2962     }
2963   }
2964   if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
2965     result |= CPU_HV;
2966   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
2967     result |= CPU_BMI1;
2968   if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
2969     result |= CPU_TSC;
2970   if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
2971     result |= CPU_TSCINV_BIT;
2972   if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
2973     result |= CPU_AES;
2974   if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
2975     result |= CPU_ERMS;
2976   if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2977     result |= CPU_FSRM;
2978   if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
2979     result |= CPU_CLMUL;
2980   if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
2981     result |= CPU_RTM;
2982   if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
2983      result |= CPU_ADX;
2984   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
2985     result |= CPU_BMI2;
2986   if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
2987     result |= CPU_SHA;
2988   if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
2989     result |= CPU_FMA;
2990   if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
2991     result |= CPU_FLUSHOPT;
2992   if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
2993     result |= CPU_RDTSCP;
2994   if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
2995     result |= CPU_RDPID;
2996 
2997   // AMD|Hygon features.
2998   if (is_amd_family()) {
2999     if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
3000         (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
3001       result |= CPU_3DNOW_PREFETCH;
3002     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
3003       result |= CPU_LZCNT;
3004     if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
3005       result |= CPU_SSE4A;
3006   }
3007 
3008   // Intel features.
3009   if (is_intel()) {
3010     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3011       result |= CPU_LZCNT;
3012     }
3013     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3014       result |= CPU_3DNOW_PREFETCH;
3015     }
3016     if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
3017       result |= CPU_CLWB;
3018     }
3019     if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
3020       result |= CPU_SERIALIZE;
3021   }
3022 
3023   // ZX features.
3024   if (is_zx()) {
3025     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3026       result |= CPU_LZCNT;
3027     }
3028     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3029       result |= CPU_3DNOW_PREFETCH;
3030     }
3031   }
3032 
3033   // Protection key features.
3034   if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) {
3035     result |= CPU_PKU;
3036   }
3037   if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) {
3038     result |= CPU_OSPKE;
3039   }
3040 
3041   // Control flow enforcement (CET) features.
3042   if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) {
3043     result |= CPU_CET_SS;
3044   }
3045   if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) {
3046     result |= CPU_CET_IBT;
3047   }
3048 
3049   // Composite features.
3050   if (supports_tscinv_bit() &&
3051       ((is_amd_family() && !is_amd_Barcelona()) ||
3052        is_intel_tsc_synched_at_init())) {
3053     result |= CPU_TSCINV;
3054   }
3055 
3056   return result;
3057 }
3058 
3059 bool VM_Version::os_supports_avx_vectors() {
3060   bool retVal = false;
3061   int nreg = 2 LP64_ONLY(+2);
3062   if (supports_evex()) {
3063     // Verify that OS save/restore all bits of EVEX registers
3064     // during signal processing.
3065     retVal = true;
3066     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3067       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3068         retVal = false;
3069         break;
3070       }
3071     }
3072   } else if (supports_avx()) {
3073     // Verify that OS save/restore all bits of AVX registers
3074     // during signal processing.
3075     retVal = true;
3076     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3077       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3078         retVal = false;
3079         break;
3080       }
3081     }
3082     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3083     if (retVal == false) {
3084       // Verify that OS save/restore all bits of EVEX registers
3085       // during signal processing.
3086       retVal = true;
3087       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3088         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3089           retVal = false;
3090           break;
3091         }
3092       }
3093     }
3094   }
3095   return retVal;
3096 }
3097 
3098 uint VM_Version::cores_per_cpu() {
3099   uint result = 1;
3100   if (is_intel()) {
3101     bool supports_topology = supports_processor_topology();
3102     if (supports_topology) {
3103       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3104                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3105     }
3106     if (!supports_topology || result == 0) {
3107       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3108     }
3109   } else if (is_amd_family()) {
3110     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3111   } else if (is_zx()) {
3112     bool supports_topology = supports_processor_topology();
3113     if (supports_topology) {
3114       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3115                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3116     }
3117     if (!supports_topology || result == 0) {
3118       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3119     }
3120   }
3121   return result;
3122 }
3123 
3124 uint VM_Version::threads_per_core() {
3125   uint result = 1;
3126   if (is_intel() && supports_processor_topology()) {
3127     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3128   } else if (is_zx() && supports_processor_topology()) {
3129     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3130   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3131     if (cpu_family() >= 0x17) {
3132       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3133     } else {
3134       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3135                  cores_per_cpu();
3136     }
3137   }
3138   return (result == 0 ? 1 : result);
3139 }
3140 
3141 uint VM_Version::L1_line_size() {
3142   uint result = 0;
3143   if (is_intel()) {
3144     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3145   } else if (is_amd_family()) {
3146     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3147   } else if (is_zx()) {
3148     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3149   }
3150   if (result < 32) // not defined ?
3151     result = 32;   // 32 bytes by default on x86 and other x64
3152   return result;
3153 }
3154 
3155 bool VM_Version::is_intel_tsc_synched_at_init() {
3156   if (is_intel_family_core()) {
3157     uint32_t ext_model = extended_cpu_model();
3158     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3159         ext_model == CPU_MODEL_WESTMERE_EP    ||
3160         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3161         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3162       // <= 2-socket invariant tsc support. EX versions are usually used
3163       // in > 2-socket systems and likely don't synchronize tscs at
3164       // initialization.
3165       // Code that uses tsc values must be prepared for them to arbitrarily
3166       // jump forward or backward.
3167       return true;
3168     }
3169   }
3170   return false;
3171 }
3172 
3173 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3174   // Hardware prefetching (distance/size in bytes):
3175   // Pentium 3 -  64 /  32
3176   // Pentium 4 - 256 / 128
3177   // Athlon    -  64 /  32 ????
3178   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3179   // Core      - 128 /  64
3180   //
3181   // Software prefetching (distance in bytes / instruction with best score):
3182   // Pentium 3 - 128 / prefetchnta
3183   // Pentium 4 - 512 / prefetchnta
3184   // Athlon    - 128 / prefetchnta
3185   // Opteron   - 256 / prefetchnta
3186   // Core      - 256 / prefetchnta
3187   // It will be used only when AllocatePrefetchStyle > 0
3188 
3189   if (is_amd_family()) { // AMD | Hygon
3190     if (supports_sse2()) {
3191       return 256; // Opteron
3192     } else {
3193       return 128; // Athlon
3194     }
3195   } else { // Intel
3196     if (supports_sse3() && cpu_family() == 6) {
3197       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3198         return 192;
3199       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3200 #ifdef _LP64
3201         return 384;
3202 #else
3203         return 320;
3204 #endif
3205       }
3206     }
3207     if (supports_sse2()) {
3208       if (cpu_family() == 6) {
3209         return 256; // Pentium M, Core, Core2
3210       } else {
3211         return 512; // Pentium 4
3212       }
3213     } else {
3214       return 128; // Pentium 3 (and all other old CPUs)
3215     }
3216   }
3217 }
3218 
3219 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3220   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3221   switch (id) {
3222   case vmIntrinsics::_floatToFloat16:
3223   case vmIntrinsics::_float16ToFloat:
3224     if (!supports_float16()) {
3225       return false;
3226     }
3227     break;
3228   default:
3229     break;
3230   }
3231   return true;
3232 }
3233