1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/powerOfTwo.hpp"
  42 #include "utilities/virtualizationSupport.hpp"
  43 
  44 int VM_Version::_cpu;
  45 int VM_Version::_model;
  46 int VM_Version::_stepping;
  47 bool VM_Version::_has_intel_jcc_erratum;
  48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  49 
  50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  52 #undef DECLARE_CPU_FEATURE_FLAG
  53 
  54 // Address of instruction which causes SEGV
  55 address VM_Version::_cpuinfo_segv_addr = nullptr;
  56 // Address of instruction after the one which causes SEGV
  57 address VM_Version::_cpuinfo_cont_addr = nullptr;
  58 // Address of instruction which causes APX specific SEGV
  59 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  60 // Address of instruction after the one which causes APX specific SEGV
  61 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  62 
  63 static BufferBlob* stub_blob;
  64 static const int stub_size = 2000;
  65 
  66 extern "C" {
  67   typedef void (*get_cpu_info_stub_t)(void*);
  68   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  69   typedef void (*clear_apx_test_state_t)(void);
  70 }
  71 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  72 static detect_virt_stub_t detect_virt_stub = nullptr;
  73 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  74 
  75 #ifdef _LP64
  76 
  77 bool VM_Version::supports_clflush() {
  78   // clflush should always be available on x86_64
  79   // if not we are in real trouble because we rely on it
  80   // to flush the code cache.
  81   // Unfortunately, Assembler::clflush is currently called as part
  82   // of generation of the code cache flush routine. This happens
  83   // under Universe::init before the processor features are set
  84   // up. Assembler::flush calls this routine to check that clflush
  85   // is allowed. So, we give the caller a free pass if Universe init
  86   // is still in progress.
  87   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  88   return true;
  89 }
  90 #endif
  91 
  92 #define CPUID_STANDARD_FN   0x0
  93 #define CPUID_STANDARD_FN_1 0x1
  94 #define CPUID_STANDARD_FN_4 0x4
  95 #define CPUID_STANDARD_FN_B 0xb
  96 
  97 #define CPUID_EXTENDED_FN   0x80000000
  98 #define CPUID_EXTENDED_FN_1 0x80000001
  99 #define CPUID_EXTENDED_FN_2 0x80000002
 100 #define CPUID_EXTENDED_FN_3 0x80000003
 101 #define CPUID_EXTENDED_FN_4 0x80000004
 102 #define CPUID_EXTENDED_FN_7 0x80000007
 103 #define CPUID_EXTENDED_FN_8 0x80000008
 104 
 105 class VM_Version_StubGenerator: public StubCodeGenerator {
 106  public:
 107 
 108   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 109 
 110 #if defined(_LP64)
 111   address clear_apx_test_state() {
 112 #   define __ _masm->
 113     address start = __ pc();
 114     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 115     // handling guarantees that preserved register values post signal handling were
 116     // re-instantiated by operating system and not because they were not modified externally.
 117 
 118     bool save_apx = UseAPX;
 119     VM_Version::set_apx_cpuFeatures();
 120     UseAPX = true;
 121     // EGPR state save/restoration.
 122     __ mov64(r16, 0L);
 123     __ mov64(r31, 0L);
 124     UseAPX = save_apx;
 125     VM_Version::clean_cpuFeatures();
 126     __ ret(0);
 127     return start;
 128   }
 129 #endif
 130 
 131   address generate_get_cpu_info() {
 132     // Flags to test CPU type.
 133     const uint32_t HS_EFL_AC = 0x40000;
 134     const uint32_t HS_EFL_ID = 0x200000;
 135     // Values for when we don't have a CPUID instruction.
 136     const int      CPU_FAMILY_SHIFT = 8;
 137     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 138     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 139     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 140 
 141     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 142     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 143     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 144     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 145 
 146     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 147 #   define __ _masm->
 148 
 149     address start = __ pc();
 150 
 151     //
 152     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 153     //
 154     // LP64: rcx and rdx are first and second argument registers on windows
 155 
 156     __ push(rbp);
 157 #ifdef _LP64
 158     __ mov(rbp, c_rarg0); // cpuid_info address
 159 #else
 160     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 161 #endif
 162     __ push(rbx);
 163     __ push(rsi);
 164     __ pushf();          // preserve rbx, and flags
 165     __ pop(rax);
 166     __ push(rax);
 167     __ mov(rcx, rax);
 168     //
 169     // if we are unable to change the AC flag, we have a 386
 170     //
 171     __ xorl(rax, HS_EFL_AC);
 172     __ push(rax);
 173     __ popf();
 174     __ pushf();
 175     __ pop(rax);
 176     __ cmpptr(rax, rcx);
 177     __ jccb(Assembler::notEqual, detect_486);
 178 
 179     __ movl(rax, CPU_FAMILY_386);
 180     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 181     __ jmp(done);
 182 
 183     //
 184     // If we are unable to change the ID flag, we have a 486 which does
 185     // not support the "cpuid" instruction.
 186     //
 187     __ bind(detect_486);
 188     __ mov(rax, rcx);
 189     __ xorl(rax, HS_EFL_ID);
 190     __ push(rax);
 191     __ popf();
 192     __ pushf();
 193     __ pop(rax);
 194     __ cmpptr(rcx, rax);
 195     __ jccb(Assembler::notEqual, detect_586);
 196 
 197     __ bind(cpu486);
 198     __ movl(rax, CPU_FAMILY_486);
 199     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 200     __ jmp(done);
 201 
 202     //
 203     // At this point, we have a chip which supports the "cpuid" instruction
 204     //
 205     __ bind(detect_586);
 206     __ xorl(rax, rax);
 207     __ cpuid();
 208     __ orl(rax, rax);
 209     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 210                                         // value of at least 1, we give up and
 211                                         // assume a 486
 212     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 213     __ movl(Address(rsi, 0), rax);
 214     __ movl(Address(rsi, 4), rbx);
 215     __ movl(Address(rsi, 8), rcx);
 216     __ movl(Address(rsi,12), rdx);
 217 
 218     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 219     __ jccb(Assembler::belowEqual, std_cpuid4);
 220 
 221     //
 222     // cpuid(0xB) Processor Topology
 223     //
 224     __ movl(rax, 0xb);
 225     __ xorl(rcx, rcx);   // Threads level
 226     __ cpuid();
 227 
 228     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 229     __ movl(Address(rsi, 0), rax);
 230     __ movl(Address(rsi, 4), rbx);
 231     __ movl(Address(rsi, 8), rcx);
 232     __ movl(Address(rsi,12), rdx);
 233 
 234     __ movl(rax, 0xb);
 235     __ movl(rcx, 1);     // Cores level
 236     __ cpuid();
 237     __ push(rax);
 238     __ andl(rax, 0x1f);  // Determine if valid topology level
 239     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 240     __ andl(rax, 0xffff);
 241     __ pop(rax);
 242     __ jccb(Assembler::equal, std_cpuid4);
 243 
 244     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 245     __ movl(Address(rsi, 0), rax);
 246     __ movl(Address(rsi, 4), rbx);
 247     __ movl(Address(rsi, 8), rcx);
 248     __ movl(Address(rsi,12), rdx);
 249 
 250     __ movl(rax, 0xb);
 251     __ movl(rcx, 2);     // Packages level
 252     __ cpuid();
 253     __ push(rax);
 254     __ andl(rax, 0x1f);  // Determine if valid topology level
 255     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 256     __ andl(rax, 0xffff);
 257     __ pop(rax);
 258     __ jccb(Assembler::equal, std_cpuid4);
 259 
 260     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 261     __ movl(Address(rsi, 0), rax);
 262     __ movl(Address(rsi, 4), rbx);
 263     __ movl(Address(rsi, 8), rcx);
 264     __ movl(Address(rsi,12), rdx);
 265 
 266     //
 267     // cpuid(0x4) Deterministic cache params
 268     //
 269     __ bind(std_cpuid4);
 270     __ movl(rax, 4);
 271     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 272     __ jccb(Assembler::greater, std_cpuid1);
 273 
 274     __ xorl(rcx, rcx);   // L1 cache
 275     __ cpuid();
 276     __ push(rax);
 277     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 278     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 279     __ pop(rax);
 280     __ jccb(Assembler::equal, std_cpuid1);
 281 
 282     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 283     __ movl(Address(rsi, 0), rax);
 284     __ movl(Address(rsi, 4), rbx);
 285     __ movl(Address(rsi, 8), rcx);
 286     __ movl(Address(rsi,12), rdx);
 287 
 288     //
 289     // Standard cpuid(0x1)
 290     //
 291     __ bind(std_cpuid1);
 292     __ movl(rax, 1);
 293     __ cpuid();
 294     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 295     __ movl(Address(rsi, 0), rax);
 296     __ movl(Address(rsi, 4), rbx);
 297     __ movl(Address(rsi, 8), rcx);
 298     __ movl(Address(rsi,12), rdx);
 299 
 300     //
 301     // Check if OS has enabled XGETBV instruction to access XCR0
 302     // (OSXSAVE feature flag) and CPU supports AVX
 303     //
 304     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 305     __ cmpl(rcx, 0x18000000);
 306     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 307 
 308     //
 309     // XCR0, XFEATURE_ENABLED_MASK register
 310     //
 311     __ xorl(rcx, rcx);   // zero for XCR0 register
 312     __ xgetbv();
 313     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 314     __ movl(Address(rsi, 0), rax);
 315     __ movl(Address(rsi, 4), rdx);
 316 
 317     //
 318     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 319     //
 320     __ bind(sef_cpuid);
 321     __ movl(rax, 7);
 322     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 323     __ jccb(Assembler::greater, ext_cpuid);
 324     // ECX = 0
 325     __ xorl(rcx, rcx);
 326     __ cpuid();
 327     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 328     __ movl(Address(rsi, 0), rax);
 329     __ movl(Address(rsi, 4), rbx);
 330     __ movl(Address(rsi, 8), rcx);
 331     __ movl(Address(rsi, 12), rdx);
 332 
 333     //
 334     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 335     //
 336     __ bind(sefsl1_cpuid);
 337     __ movl(rax, 7);
 338     __ movl(rcx, 1);
 339     __ cpuid();
 340     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 341     __ movl(Address(rsi, 0), rax);
 342     __ movl(Address(rsi, 4), rdx);
 343 
 344     //
 345     // Extended cpuid(0x80000000)
 346     //
 347     __ bind(ext_cpuid);
 348     __ movl(rax, 0x80000000);
 349     __ cpuid();
 350     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 351     __ jcc(Assembler::belowEqual, done);
 352     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 353     __ jcc(Assembler::belowEqual, ext_cpuid1);
 354     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 355     __ jccb(Assembler::belowEqual, ext_cpuid5);
 356     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 357     __ jccb(Assembler::belowEqual, ext_cpuid7);
 358     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 359     __ jccb(Assembler::belowEqual, ext_cpuid8);
 360     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 361     __ jccb(Assembler::below, ext_cpuid8);
 362     //
 363     // Extended cpuid(0x8000001E)
 364     //
 365     __ movl(rax, 0x8000001E);
 366     __ cpuid();
 367     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 368     __ movl(Address(rsi, 0), rax);
 369     __ movl(Address(rsi, 4), rbx);
 370     __ movl(Address(rsi, 8), rcx);
 371     __ movl(Address(rsi,12), rdx);
 372 
 373     //
 374     // Extended cpuid(0x80000008)
 375     //
 376     __ bind(ext_cpuid8);
 377     __ movl(rax, 0x80000008);
 378     __ cpuid();
 379     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 380     __ movl(Address(rsi, 0), rax);
 381     __ movl(Address(rsi, 4), rbx);
 382     __ movl(Address(rsi, 8), rcx);
 383     __ movl(Address(rsi,12), rdx);
 384 
 385     //
 386     // Extended cpuid(0x80000007)
 387     //
 388     __ bind(ext_cpuid7);
 389     __ movl(rax, 0x80000007);
 390     __ cpuid();
 391     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 392     __ movl(Address(rsi, 0), rax);
 393     __ movl(Address(rsi, 4), rbx);
 394     __ movl(Address(rsi, 8), rcx);
 395     __ movl(Address(rsi,12), rdx);
 396 
 397     //
 398     // Extended cpuid(0x80000005)
 399     //
 400     __ bind(ext_cpuid5);
 401     __ movl(rax, 0x80000005);
 402     __ cpuid();
 403     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 404     __ movl(Address(rsi, 0), rax);
 405     __ movl(Address(rsi, 4), rbx);
 406     __ movl(Address(rsi, 8), rcx);
 407     __ movl(Address(rsi,12), rdx);
 408 
 409     //
 410     // Extended cpuid(0x80000001)
 411     //
 412     __ bind(ext_cpuid1);
 413     __ movl(rax, 0x80000001);
 414     __ cpuid();
 415     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 416     __ movl(Address(rsi, 0), rax);
 417     __ movl(Address(rsi, 4), rbx);
 418     __ movl(Address(rsi, 8), rcx);
 419     __ movl(Address(rsi,12), rdx);
 420 
 421 #if defined(_LP64)
 422     //
 423     // Check if OS has enabled XGETBV instruction to access XCR0
 424     // (OSXSAVE feature flag) and CPU supports APX
 425     //
 426     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 427     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 428     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 429     __ movl(rax, 0x200000);
 430     __ andl(rax, Address(rsi, 4));
 431     __ cmpl(rax, 0x200000);
 432     __ jcc(Assembler::notEqual, vector_save_restore);
 433     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 434     __ movl(rax, 0x80000);
 435     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 436     __ cmpl(rax, 0x80000);
 437     __ jcc(Assembler::notEqual, vector_save_restore);
 438 
 439 #ifndef PRODUCT
 440     bool save_apx = UseAPX;
 441     VM_Version::set_apx_cpuFeatures();
 442     UseAPX = true;
 443     __ mov64(r16, VM_Version::egpr_test_value());
 444     __ mov64(r31, VM_Version::egpr_test_value());
 445     __ xorl(rsi, rsi);
 446     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 447     // Generate SEGV
 448     __ movl(rax, Address(rsi, 0));
 449 
 450     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 451     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 452     __ movq(Address(rsi, 0), r16);
 453     __ movq(Address(rsi, 8), r31);
 454 
 455     UseAPX = save_apx;
 456 #endif
 457 #endif
 458     __ bind(vector_save_restore);
 459     //
 460     // Check if OS has enabled XGETBV instruction to access XCR0
 461     // (OSXSAVE feature flag) and CPU supports AVX
 462     //
 463     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 464     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 465     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 466     __ cmpl(rcx, 0x18000000);
 467     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 468 
 469     __ movl(rax, 0x6);
 470     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 471     __ cmpl(rax, 0x6);
 472     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 473 
 474     // we need to bridge farther than imm8, so we use this island as a thunk
 475     __ bind(done);
 476     __ jmp(wrapup);
 477 
 478     __ bind(start_simd_check);
 479     //
 480     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 481     // registers are not restored after a signal processing.
 482     // Generate SEGV here (reference through null)
 483     // and check upper YMM/ZMM bits after it.
 484     //
 485     int saved_useavx = UseAVX;
 486     int saved_usesse = UseSSE;
 487 
 488     // If UseAVX is uninitialized or is set by the user to include EVEX
 489     if (use_evex) {
 490       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 491       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 492       __ movl(rax, 0x10000);
 493       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 494       __ cmpl(rax, 0x10000);
 495       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 496       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 497       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 498       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 499       __ movl(rax, 0xE0);
 500       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 501       __ cmpl(rax, 0xE0);
 502       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 503 
 504       if (FLAG_IS_DEFAULT(UseAVX)) {
 505         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 506         __ movl(rax, Address(rsi, 0));
 507         __ cmpl(rax, 0x50654);              // If it is Skylake
 508         __ jcc(Assembler::equal, legacy_setup);
 509       }
 510       // EVEX setup: run in lowest evex mode
 511       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 512       UseAVX = 3;
 513       UseSSE = 2;
 514 #ifdef _WINDOWS
 515       // xmm5-xmm15 are not preserved by caller on windows
 516       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 517       __ subptr(rsp, 64);
 518       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 519       __ subptr(rsp, 64);
 520       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 521       __ subptr(rsp, 64);
 522       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 523 #endif // _WINDOWS
 524 
 525       // load value into all 64 bytes of zmm7 register
 526       __ movl(rcx, VM_Version::ymm_test_value());
 527       __ movdl(xmm0, rcx);
 528       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 529       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 530 #ifdef _LP64
 531       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 532       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 533 #endif
 534       VM_Version::clean_cpuFeatures();
 535       __ jmp(save_restore_except);
 536     }
 537 
 538     __ bind(legacy_setup);
 539     // AVX setup
 540     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 541     UseAVX = 1;
 542     UseSSE = 2;
 543 #ifdef _WINDOWS
 544     __ subptr(rsp, 32);
 545     __ vmovdqu(Address(rsp, 0), xmm7);
 546     __ subptr(rsp, 32);
 547     __ vmovdqu(Address(rsp, 0), xmm8);
 548     __ subptr(rsp, 32);
 549     __ vmovdqu(Address(rsp, 0), xmm15);
 550 #endif // _WINDOWS
 551 
 552     // load value into all 32 bytes of ymm7 register
 553     __ movl(rcx, VM_Version::ymm_test_value());
 554 
 555     __ movdl(xmm0, rcx);
 556     __ pshufd(xmm0, xmm0, 0x00);
 557     __ vinsertf128_high(xmm0, xmm0);
 558     __ vmovdqu(xmm7, xmm0);
 559 #ifdef _LP64
 560     __ vmovdqu(xmm8, xmm0);
 561     __ vmovdqu(xmm15, xmm0);
 562 #endif
 563     VM_Version::clean_cpuFeatures();
 564 
 565     __ bind(save_restore_except);
 566     __ xorl(rsi, rsi);
 567     VM_Version::set_cpuinfo_segv_addr(__ pc());
 568     // Generate SEGV
 569     __ movl(rax, Address(rsi, 0));
 570 
 571     VM_Version::set_cpuinfo_cont_addr(__ pc());
 572     // Returns here after signal. Save xmm0 to check it later.
 573 
 574     // If UseAVX is uninitialized or is set by the user to include EVEX
 575     if (use_evex) {
 576       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 577       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 578       __ movl(rax, 0x10000);
 579       __ andl(rax, Address(rsi, 4));
 580       __ cmpl(rax, 0x10000);
 581       __ jcc(Assembler::notEqual, legacy_save_restore);
 582       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 583       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 584       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 585       __ movl(rax, 0xE0);
 586       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 587       __ cmpl(rax, 0xE0);
 588       __ jcc(Assembler::notEqual, legacy_save_restore);
 589 
 590       if (FLAG_IS_DEFAULT(UseAVX)) {
 591         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 592         __ movl(rax, Address(rsi, 0));
 593         __ cmpl(rax, 0x50654);              // If it is Skylake
 594         __ jcc(Assembler::equal, legacy_save_restore);
 595       }
 596       // EVEX check: run in lowest evex mode
 597       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 598       UseAVX = 3;
 599       UseSSE = 2;
 600       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 601       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 602       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 603 #ifdef _LP64
 604       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 605       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 606 #endif
 607 
 608 #ifdef _WINDOWS
 609       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 610       __ addptr(rsp, 64);
 611       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 612       __ addptr(rsp, 64);
 613       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 614       __ addptr(rsp, 64);
 615 #endif // _WINDOWS
 616       generate_vzeroupper(wrapup);
 617       VM_Version::clean_cpuFeatures();
 618       UseAVX = saved_useavx;
 619       UseSSE = saved_usesse;
 620       __ jmp(wrapup);
 621    }
 622 
 623     __ bind(legacy_save_restore);
 624     // AVX check
 625     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 626     UseAVX = 1;
 627     UseSSE = 2;
 628     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 629     __ vmovdqu(Address(rsi, 0), xmm0);
 630     __ vmovdqu(Address(rsi, 32), xmm7);
 631 #ifdef _LP64
 632     __ vmovdqu(Address(rsi, 64), xmm8);
 633     __ vmovdqu(Address(rsi, 96), xmm15);
 634 #endif
 635 
 636 #ifdef _WINDOWS
 637     __ vmovdqu(xmm15, Address(rsp, 0));
 638     __ addptr(rsp, 32);
 639     __ vmovdqu(xmm8, Address(rsp, 0));
 640     __ addptr(rsp, 32);
 641     __ vmovdqu(xmm7, Address(rsp, 0));
 642     __ addptr(rsp, 32);
 643 #endif // _WINDOWS
 644 
 645     generate_vzeroupper(wrapup);
 646     VM_Version::clean_cpuFeatures();
 647     UseAVX = saved_useavx;
 648     UseSSE = saved_usesse;
 649 
 650     __ bind(wrapup);
 651     __ popf();
 652     __ pop(rsi);
 653     __ pop(rbx);
 654     __ pop(rbp);
 655     __ ret(0);
 656 
 657 #   undef __
 658 
 659     return start;
 660   };
 661   void generate_vzeroupper(Label& L_wrapup) {
 662 #   define __ _masm->
 663     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 664     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 665     __ jcc(Assembler::notEqual, L_wrapup);
 666     __ movl(rcx, 0x0FFF0FF0);
 667     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 668     __ andl(rcx, Address(rsi, 0));
 669     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 670     __ jcc(Assembler::equal, L_wrapup);
 671     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 672     __ jcc(Assembler::equal, L_wrapup);
 673     // vzeroupper() will use a pre-computed instruction sequence that we
 674     // can't compute until after we've determined CPU capabilities. Use
 675     // uncached variant here directly to be able to bootstrap correctly
 676     __ vzeroupper_uncached();
 677 #   undef __
 678   }
 679   address generate_detect_virt() {
 680     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 681 #   define __ _masm->
 682 
 683     address start = __ pc();
 684 
 685     // Evacuate callee-saved registers
 686     __ push(rbp);
 687     __ push(rbx);
 688     __ push(rsi); // for Windows
 689 
 690 #ifdef _LP64
 691     __ mov(rax, c_rarg0); // CPUID leaf
 692     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 693 #else
 694     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 695     __ movptr(rsi, Address(rsp, 20)); // register array address
 696 #endif
 697 
 698     __ cpuid();
 699 
 700     // Store result to register array
 701     __ movl(Address(rsi,  0), rax);
 702     __ movl(Address(rsi,  4), rbx);
 703     __ movl(Address(rsi,  8), rcx);
 704     __ movl(Address(rsi, 12), rdx);
 705 
 706     // Epilogue
 707     __ pop(rsi);
 708     __ pop(rbx);
 709     __ pop(rbp);
 710     __ ret(0);
 711 
 712 #   undef __
 713 
 714     return start;
 715   };
 716 
 717 
 718   address generate_getCPUIDBrandString(void) {
 719     // Flags to test CPU type.
 720     const uint32_t HS_EFL_AC           = 0x40000;
 721     const uint32_t HS_EFL_ID           = 0x200000;
 722     // Values for when we don't have a CPUID instruction.
 723     const int      CPU_FAMILY_SHIFT = 8;
 724     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 725     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 726 
 727     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 728 
 729     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 730 #   define __ _masm->
 731 
 732     address start = __ pc();
 733 
 734     //
 735     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 736     //
 737     // LP64: rcx and rdx are first and second argument registers on windows
 738 
 739     __ push(rbp);
 740 #ifdef _LP64
 741     __ mov(rbp, c_rarg0); // cpuid_info address
 742 #else
 743     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 744 #endif
 745     __ push(rbx);
 746     __ push(rsi);
 747     __ pushf();          // preserve rbx, and flags
 748     __ pop(rax);
 749     __ push(rax);
 750     __ mov(rcx, rax);
 751     //
 752     // if we are unable to change the AC flag, we have a 386
 753     //
 754     __ xorl(rax, HS_EFL_AC);
 755     __ push(rax);
 756     __ popf();
 757     __ pushf();
 758     __ pop(rax);
 759     __ cmpptr(rax, rcx);
 760     __ jccb(Assembler::notEqual, detect_486);
 761 
 762     __ movl(rax, CPU_FAMILY_386);
 763     __ jmp(done);
 764 
 765     //
 766     // If we are unable to change the ID flag, we have a 486 which does
 767     // not support the "cpuid" instruction.
 768     //
 769     __ bind(detect_486);
 770     __ mov(rax, rcx);
 771     __ xorl(rax, HS_EFL_ID);
 772     __ push(rax);
 773     __ popf();
 774     __ pushf();
 775     __ pop(rax);
 776     __ cmpptr(rcx, rax);
 777     __ jccb(Assembler::notEqual, detect_586);
 778 
 779     __ bind(cpu486);
 780     __ movl(rax, CPU_FAMILY_486);
 781     __ jmp(done);
 782 
 783     //
 784     // At this point, we have a chip which supports the "cpuid" instruction
 785     //
 786     __ bind(detect_586);
 787     __ xorl(rax, rax);
 788     __ cpuid();
 789     __ orl(rax, rax);
 790     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 791                                         // value of at least 1, we give up and
 792                                         // assume a 486
 793 
 794     //
 795     // Extended cpuid(0x80000000) for processor brand string detection
 796     //
 797     __ bind(ext_cpuid);
 798     __ movl(rax, CPUID_EXTENDED_FN);
 799     __ cpuid();
 800     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 801     __ jcc(Assembler::below, done);
 802 
 803     //
 804     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 805     //
 806     __ movl(rax, CPUID_EXTENDED_FN_2);
 807     __ cpuid();
 808     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 809     __ movl(Address(rsi, 0), rax);
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 811     __ movl(Address(rsi, 0), rbx);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 813     __ movl(Address(rsi, 0), rcx);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 815     __ movl(Address(rsi,0), rdx);
 816 
 817     //
 818     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 819     //
 820     __ movl(rax, CPUID_EXTENDED_FN_3);
 821     __ cpuid();
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 823     __ movl(Address(rsi, 0), rax);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 825     __ movl(Address(rsi, 0), rbx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 827     __ movl(Address(rsi, 0), rcx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 829     __ movl(Address(rsi,0), rdx);
 830 
 831     //
 832     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 833     //
 834     __ movl(rax, CPUID_EXTENDED_FN_4);
 835     __ cpuid();
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 837     __ movl(Address(rsi, 0), rax);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 839     __ movl(Address(rsi, 0), rbx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 841     __ movl(Address(rsi, 0), rcx);
 842     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 843     __ movl(Address(rsi,0), rdx);
 844 
 845     //
 846     // return
 847     //
 848     __ bind(done);
 849     __ popf();
 850     __ pop(rsi);
 851     __ pop(rbx);
 852     __ pop(rbp);
 853     __ ret(0);
 854 
 855 #   undef __
 856 
 857     return start;
 858   };
 859 };
 860 
 861 void VM_Version::get_processor_features() {
 862 
 863   _cpu = 4; // 486 by default
 864   _model = 0;
 865   _stepping = 0;
 866   _features = 0;
 867   _logical_processors_per_package = 1;
 868   // i486 internal cache is both I&D and has a 16-byte line size
 869   _L1_data_cache_line_size = 16;
 870 
 871   // Get raw processor info
 872 
 873   get_cpu_info_stub(&_cpuid_info);
 874 
 875   assert_is_initialized();
 876   _cpu = extended_cpu_family();
 877   _model = extended_cpu_model();
 878   _stepping = cpu_stepping();
 879 
 880   if (cpu_family() > 4) { // it supports CPUID
 881     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 882     _cpu_features = _features;   // Preserve features
 883     // Logical processors are only available on P4s and above,
 884     // and only if hyperthreading is available.
 885     _logical_processors_per_package = logical_processor_count();
 886     _L1_data_cache_line_size = L1_line_size();
 887   }
 888 
 889   // xchg and xadd instructions
 890   _supports_atomic_getset4 = true;
 891   _supports_atomic_getadd4 = true;
 892   LP64_ONLY(_supports_atomic_getset8 = true);
 893   LP64_ONLY(_supports_atomic_getadd8 = true);
 894 
 895 #ifdef _LP64
 896   // OS should support SSE for x64 and hardware should support at least SSE2.
 897   if (!VM_Version::supports_sse2()) {
 898     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 899   }
 900   // in 64 bit the use of SSE2 is the minimum
 901   if (UseSSE < 2) UseSSE = 2;
 902 #endif
 903 
 904 #ifdef AMD64
 905   // flush_icache_stub have to be generated first.
 906   // That is why Icache line size is hard coded in ICache class,
 907   // see icache_x86.hpp. It is also the reason why we can't use
 908   // clflush instruction in 32-bit VM since it could be running
 909   // on CPU which does not support it.
 910   //
 911   // The only thing we can do is to verify that flushed
 912   // ICache::line_size has correct value.
 913   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 914   // clflush_size is size in quadwords (8 bytes).
 915   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 916 #endif
 917 
 918 #ifdef _LP64
 919   // assigning this field effectively enables Unsafe.writebackMemory()
 920   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 921   // that is only implemented on x86_64 and only if the OS plays ball
 922   if (os::supports_map_sync()) {
 923     // publish data cache line flush size to generic field, otherwise
 924     // let if default to zero thereby disabling writeback
 925     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 926   }
 927 #endif
 928 
 929   // Check if processor has Intel Ecore
 930   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 931     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 932       _model == 0xCC || _model == 0xDD)) {
 933     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 934   }
 935 
 936   if (UseSSE < 4) {
 937     _features &= ~CPU_SSE4_1;
 938     _features &= ~CPU_SSE4_2;
 939   }
 940 
 941   if (UseSSE < 3) {
 942     _features &= ~CPU_SSE3;
 943     _features &= ~CPU_SSSE3;
 944     _features &= ~CPU_SSE4A;
 945   }
 946 
 947   if (UseSSE < 2)
 948     _features &= ~CPU_SSE2;
 949 
 950   if (UseSSE < 1)
 951     _features &= ~CPU_SSE;
 952 
 953   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 954   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 955     UseAVX = 0;
 956   }
 957 
 958   // UseSSE is set to the smaller of what hardware supports and what
 959   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 960   // older Pentiums which do not support it.
 961   int use_sse_limit = 0;
 962   if (UseSSE > 0) {
 963     if (UseSSE > 3 && supports_sse4_1()) {
 964       use_sse_limit = 4;
 965     } else if (UseSSE > 2 && supports_sse3()) {
 966       use_sse_limit = 3;
 967     } else if (UseSSE > 1 && supports_sse2()) {
 968       use_sse_limit = 2;
 969     } else if (UseSSE > 0 && supports_sse()) {
 970       use_sse_limit = 1;
 971     } else {
 972       use_sse_limit = 0;
 973     }
 974   }
 975   if (FLAG_IS_DEFAULT(UseSSE)) {
 976     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 977   } else if (UseSSE > use_sse_limit) {
 978     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 979     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 980   }
 981 
 982   // first try initial setting and detect what we can support
 983   int use_avx_limit = 0;
 984   if (UseAVX > 0) {
 985     if (UseSSE < 4) {
 986       // Don't use AVX if SSE is unavailable or has been disabled.
 987       use_avx_limit = 0;
 988     } else if (UseAVX > 2 && supports_evex()) {
 989       use_avx_limit = 3;
 990     } else if (UseAVX > 1 && supports_avx2()) {
 991       use_avx_limit = 2;
 992     } else if (UseAVX > 0 && supports_avx()) {
 993       use_avx_limit = 1;
 994     } else {
 995       use_avx_limit = 0;
 996     }
 997   }
 998   if (FLAG_IS_DEFAULT(UseAVX)) {
 999     // Don't use AVX-512 on older Skylakes unless explicitly requested.
1000     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1001       FLAG_SET_DEFAULT(UseAVX, 2);
1002     } else {
1003       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1004     }
1005   }
1006 
1007   if (UseAVX > use_avx_limit) {
1008     if (UseSSE < 4) {
1009       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1010     } else {
1011       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1012     }
1013     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1014   }
1015 
1016   if (UseAVX < 3) {
1017     _features &= ~CPU_AVX512F;
1018     _features &= ~CPU_AVX512DQ;
1019     _features &= ~CPU_AVX512CD;
1020     _features &= ~CPU_AVX512BW;
1021     _features &= ~CPU_AVX512VL;
1022     _features &= ~CPU_AVX512_VPOPCNTDQ;
1023     _features &= ~CPU_AVX512_VPCLMULQDQ;
1024     _features &= ~CPU_AVX512_VAES;
1025     _features &= ~CPU_AVX512_VNNI;
1026     _features &= ~CPU_AVX512_VBMI;
1027     _features &= ~CPU_AVX512_VBMI2;
1028     _features &= ~CPU_AVX512_BITALG;
1029     _features &= ~CPU_AVX512_IFMA;
1030     _features &= ~CPU_APX_F;
1031     _features &= ~CPU_AVX512_FP16;
1032   }
1033 
1034   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1035   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1036   if (UseAPX && !apx_supported) {
1037     warning("UseAPX is not supported on this CPU, setting it to false");
1038     FLAG_SET_DEFAULT(UseAPX, false);
1039   } else if (FLAG_IS_DEFAULT(UseAPX)) {
1040     FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1041   }
1042 
1043   if (!UseAPX) {
1044     _features &= ~CPU_APX_F;
1045   }
1046 
1047   if (UseAVX < 2) {
1048     _features &= ~CPU_AVX2;
1049     _features &= ~CPU_AVX_IFMA;
1050   }
1051 
1052   if (UseAVX < 1) {
1053     _features &= ~CPU_AVX;
1054     _features &= ~CPU_VZEROUPPER;
1055     _features &= ~CPU_F16C;
1056     _features &= ~CPU_SHA512;
1057   }
1058 
1059   if (logical_processors_per_package() == 1) {
1060     // HT processor could be installed on a system which doesn't support HT.
1061     _features &= ~CPU_HT;
1062   }
1063 
1064   if (is_intel()) { // Intel cpus specific settings
1065     if (is_knights_family()) {
1066       _features &= ~CPU_VZEROUPPER;
1067       _features &= ~CPU_AVX512BW;
1068       _features &= ~CPU_AVX512VL;
1069       _features &= ~CPU_AVX512DQ;
1070       _features &= ~CPU_AVX512_VNNI;
1071       _features &= ~CPU_AVX512_VAES;
1072       _features &= ~CPU_AVX512_VPOPCNTDQ;
1073       _features &= ~CPU_AVX512_VPCLMULQDQ;
1074       _features &= ~CPU_AVX512_VBMI;
1075       _features &= ~CPU_AVX512_VBMI2;
1076       _features &= ~CPU_CLWB;
1077       _features &= ~CPU_FLUSHOPT;
1078       _features &= ~CPU_GFNI;
1079       _features &= ~CPU_AVX512_BITALG;
1080       _features &= ~CPU_AVX512_IFMA;
1081       _features &= ~CPU_AVX_IFMA;
1082       _features &= ~CPU_AVX512_FP16;
1083     }
1084   }
1085 
1086   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1087     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1088   } else {
1089     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1090   }
1091 
1092   assert(supports_cpuid(), "Always present");
1093   assert(supports_clflush(), "Always present");
1094   if (X86ICacheSync == -1) {
1095     // Auto-detect, choosing the best performant one that still flushes
1096     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1097     if (supports_clwb()) {
1098       FLAG_SET_ERGO(X86ICacheSync, 3);
1099     } else if (supports_clflushopt()) {
1100       FLAG_SET_ERGO(X86ICacheSync, 2);
1101     } else {
1102       FLAG_SET_ERGO(X86ICacheSync, 1);
1103     }
1104   } else {
1105     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1106       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1107     }
1108     if ((X86ICacheSync == 3) && !supports_clwb()) {
1109       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1110     }
1111     if ((X86ICacheSync == 5) && !supports_serialize()) {
1112       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1113     }
1114   }
1115 
1116   char buf[1024];
1117   int res = jio_snprintf(
1118               buf, sizeof(buf),
1119               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1120               cores_per_cpu(), threads_per_core(),
1121               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1122   assert(res > 0, "not enough temporary space allocated");
1123   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1124 
1125   _features_string = os::strdup(buf);
1126 
1127   // Use AES instructions if available.
1128   if (supports_aes()) {
1129     if (FLAG_IS_DEFAULT(UseAES)) {
1130       FLAG_SET_DEFAULT(UseAES, true);
1131     }
1132     if (!UseAES) {
1133       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1134         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1135       }
1136       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1137     } else {
1138       if (UseSSE > 2) {
1139         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1140           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1141         }
1142       } else {
1143         // The AES intrinsic stubs require AES instruction support (of course)
1144         // but also require sse3 mode or higher for instructions it use.
1145         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1146           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1147         }
1148         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1149       }
1150 
1151       // --AES-CTR begins--
1152       if (!UseAESIntrinsics) {
1153         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1154           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1155           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1156         }
1157       } else {
1158         if (supports_sse4_1()) {
1159           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1160             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1161           }
1162         } else {
1163            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1164            // but also require sse4.1 mode or higher for instructions it use.
1165           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1166              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1167            }
1168            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1169         }
1170       }
1171       // --AES-CTR ends--
1172     }
1173   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1174     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1175       warning("AES instructions are not available on this CPU");
1176       FLAG_SET_DEFAULT(UseAES, false);
1177     }
1178     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1179       warning("AES intrinsics are not available on this CPU");
1180       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1181     }
1182     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1183       warning("AES-CTR intrinsics are not available on this CPU");
1184       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1185     }
1186   }
1187 
1188   // Use CLMUL instructions if available.
1189   if (supports_clmul()) {
1190     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1191       UseCLMUL = true;
1192     }
1193   } else if (UseCLMUL) {
1194     if (!FLAG_IS_DEFAULT(UseCLMUL))
1195       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1196     FLAG_SET_DEFAULT(UseCLMUL, false);
1197   }
1198 
1199   if (UseCLMUL && (UseSSE > 2)) {
1200     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1201       UseCRC32Intrinsics = true;
1202     }
1203   } else if (UseCRC32Intrinsics) {
1204     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1205       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1206     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1207   }
1208 
1209 #ifdef _LP64
1210   if (supports_avx2()) {
1211     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1212       UseAdler32Intrinsics = true;
1213     }
1214   } else if (UseAdler32Intrinsics) {
1215     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1216       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1217     }
1218     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1219   }
1220 #else
1221   if (UseAdler32Intrinsics) {
1222     warning("Adler32Intrinsics not available on this CPU.");
1223     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1224   }
1225 #endif
1226 
1227   if (supports_sse4_2() && supports_clmul()) {
1228     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1229       UseCRC32CIntrinsics = true;
1230     }
1231   } else if (UseCRC32CIntrinsics) {
1232     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1233       warning("CRC32C intrinsics are not available on this CPU");
1234     }
1235     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1236   }
1237 
1238   // GHASH/GCM intrinsics
1239   if (UseCLMUL && (UseSSE > 2)) {
1240     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1241       UseGHASHIntrinsics = true;
1242     }
1243   } else if (UseGHASHIntrinsics) {
1244     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1245       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1246     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1247   }
1248 
1249 #ifdef _LP64
1250   // ChaCha20 Intrinsics
1251   // As long as the system supports AVX as a baseline we can do a
1252   // SIMD-enabled block function.  StubGenerator makes the determination
1253   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1254   // version.
1255   if (UseAVX >= 1) {
1256       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1257           UseChaCha20Intrinsics = true;
1258       }
1259   } else if (UseChaCha20Intrinsics) {
1260       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1261           warning("ChaCha20 intrinsic requires AVX instructions");
1262       }
1263       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1264   }
1265 #else
1266   // No support currently for ChaCha20 intrinsics on 32-bit platforms
1267   if (UseChaCha20Intrinsics) {
1268       warning("ChaCha20 intrinsics are not available on this CPU.");
1269       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1270   }
1271 #endif // _LP64
1272 
1273   // Dilithium Intrinsics
1274   // Currently we only have them for AVX512
1275 #ifdef _LP64
1276   if (supports_evex() && supports_avx512bw()) {
1277       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1278           UseDilithiumIntrinsics = true;
1279       }
1280   } else
1281 #endif
1282    if (UseDilithiumIntrinsics) {
1283       warning("Intrinsics for ML-DSA are not available on this CPU.");
1284       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1285   }
1286 
1287   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1288   if (UseAVX >= 2) {
1289     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1290       UseBASE64Intrinsics = true;
1291     }
1292   } else if (UseBASE64Intrinsics) {
1293      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1294       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1295     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1296   }
1297 
1298   if (supports_fma()) {
1299     if (FLAG_IS_DEFAULT(UseFMA)) {
1300       UseFMA = true;
1301     }
1302   } else if (UseFMA) {
1303     warning("FMA instructions are not available on this CPU");
1304     FLAG_SET_DEFAULT(UseFMA, false);
1305   }
1306 
1307   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1308     UseMD5Intrinsics = true;
1309   }
1310 
1311   if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1312     if (FLAG_IS_DEFAULT(UseSHA)) {
1313       UseSHA = true;
1314     }
1315   } else if (UseSHA) {
1316     warning("SHA instructions are not available on this CPU");
1317     FLAG_SET_DEFAULT(UseSHA, false);
1318   }
1319 
1320   if (supports_sha() && supports_sse4_1() && UseSHA) {
1321     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1322       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1323     }
1324   } else if (UseSHA1Intrinsics) {
1325     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1326     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1327   }
1328 
1329   if (supports_sse4_1() && UseSHA) {
1330     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1331       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1332     }
1333   } else if (UseSHA256Intrinsics) {
1334     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1335     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1336   }
1337 
1338 #ifdef _LP64
1339   // These are only supported on 64-bit
1340   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1341     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1342       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1343     }
1344   } else
1345 #endif
1346   if (UseSHA512Intrinsics) {
1347     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1348     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1349   }
1350 
1351 #ifdef _LP64
1352   if (supports_evex() && supports_avx512bw()) {
1353       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1354           UseSHA3Intrinsics = true;
1355       }
1356   } else
1357 #endif
1358    if (UseSHA3Intrinsics) {
1359       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1360       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1361   }
1362 
1363   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1364     FLAG_SET_DEFAULT(UseSHA, false);
1365   }
1366 
1367 #if COMPILER2_OR_JVMCI
1368   int max_vector_size = 0;
1369   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1370     // 16 byte vectors (in XMM) are supported with SSE2+
1371     max_vector_size = 16;
1372   } else if (UseAVX == 1 || UseAVX == 2) {
1373     // 32 bytes vectors (in YMM) are only supported with AVX+
1374     max_vector_size = 32;
1375   } else if (UseAVX > 2) {
1376     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1377     max_vector_size = 64;
1378   }
1379 
1380 #ifdef _LP64
1381   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1382 #else
1383   int min_vector_size = 0;
1384 #endif
1385 
1386   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1387     if (MaxVectorSize < min_vector_size) {
1388       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1389       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1390     }
1391     if (MaxVectorSize > max_vector_size) {
1392       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1393       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1394     }
1395     if (!is_power_of_2(MaxVectorSize)) {
1396       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1397       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1398     }
1399   } else {
1400     // If default, use highest supported configuration
1401     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1402   }
1403 
1404 #if defined(COMPILER2) && defined(ASSERT)
1405   if (MaxVectorSize > 0) {
1406     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1407       tty->print_cr("State of YMM registers after signal handle:");
1408       int nreg = 2 LP64_ONLY(+2);
1409       const char* ymm_name[4] = {"0", "7", "8", "15"};
1410       for (int i = 0; i < nreg; i++) {
1411         tty->print("YMM%s:", ymm_name[i]);
1412         for (int j = 7; j >=0; j--) {
1413           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1414         }
1415         tty->cr();
1416       }
1417     }
1418   }
1419 #endif // COMPILER2 && ASSERT
1420 
1421 #ifdef _LP64
1422   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1423     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1424       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1425     }
1426   } else
1427 #endif
1428   if (UsePoly1305Intrinsics) {
1429     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1430     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1431   }
1432 
1433 #ifdef _LP64
1434   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1435     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1436       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1437     }
1438   } else
1439 #endif
1440   if (UseIntPolyIntrinsics) {
1441     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1442     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1443   }
1444 
1445 #ifdef _LP64
1446   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1447     UseMultiplyToLenIntrinsic = true;
1448   }
1449   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1450     UseSquareToLenIntrinsic = true;
1451   }
1452   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1453     UseMulAddIntrinsic = true;
1454   }
1455   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1456     UseMontgomeryMultiplyIntrinsic = true;
1457   }
1458   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1459     UseMontgomerySquareIntrinsic = true;
1460   }
1461 #else
1462   if (UseMultiplyToLenIntrinsic) {
1463     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1464       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1465     }
1466     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1467   }
1468   if (UseMontgomeryMultiplyIntrinsic) {
1469     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1470       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1471     }
1472     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1473   }
1474   if (UseMontgomerySquareIntrinsic) {
1475     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1476       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1477     }
1478     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1479   }
1480   if (UseSquareToLenIntrinsic) {
1481     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1482       warning("squareToLen intrinsic is not available in 32-bit VM");
1483     }
1484     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1485   }
1486   if (UseMulAddIntrinsic) {
1487     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1488       warning("mulAdd intrinsic is not available in 32-bit VM");
1489     }
1490     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1491   }
1492 #endif // _LP64
1493 #endif // COMPILER2_OR_JVMCI
1494 
1495   // On new cpus instructions which update whole XMM register should be used
1496   // to prevent partial register stall due to dependencies on high half.
1497   //
1498   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1499   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1500   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1501   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1502 
1503 
1504   if (is_zx()) { // ZX cpus specific settings
1505     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1506       UseStoreImmI16 = false; // don't use it on ZX cpus
1507     }
1508     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1509       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1510         // Use it on all ZX cpus
1511         UseAddressNop = true;
1512       }
1513     }
1514     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1515       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1516     }
1517     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1518       if (supports_sse3()) {
1519         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1520       } else {
1521         UseXmmRegToRegMoveAll = false;
1522       }
1523     }
1524     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1525 #ifdef COMPILER2
1526       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1527         // For new ZX cpus do the next optimization:
1528         // don't align the beginning of a loop if there are enough instructions
1529         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1530         // in current fetch line (OptoLoopAlignment) or the padding
1531         // is big (> MaxLoopPad).
1532         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1533         // generated NOP instructions. 11 is the largest size of one
1534         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1535         MaxLoopPad = 11;
1536       }
1537 #endif // COMPILER2
1538       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1539         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1540       }
1541       if (supports_sse4_2()) { // new ZX cpus
1542         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1543           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1544         }
1545       }
1546       if (supports_sse4_2()) {
1547         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1548           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1549         }
1550       } else {
1551         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1552           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1553         }
1554         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1555       }
1556     }
1557 
1558     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1559       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1560     }
1561   }
1562 
1563   if (is_amd_family()) { // AMD cpus specific settings
1564     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1565       // Use it on new AMD cpus starting from Opteron.
1566       UseAddressNop = true;
1567     }
1568     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1569       // Use it on new AMD cpus starting from Opteron.
1570       UseNewLongLShift = true;
1571     }
1572     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1573       if (supports_sse4a()) {
1574         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1575       } else {
1576         UseXmmLoadAndClearUpper = false;
1577       }
1578     }
1579     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1580       if (supports_sse4a()) {
1581         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1582       } else {
1583         UseXmmRegToRegMoveAll = false;
1584       }
1585     }
1586     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1587       if (supports_sse4a()) {
1588         UseXmmI2F = true;
1589       } else {
1590         UseXmmI2F = false;
1591       }
1592     }
1593     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1594       if (supports_sse4a()) {
1595         UseXmmI2D = true;
1596       } else {
1597         UseXmmI2D = false;
1598       }
1599     }
1600     if (supports_sse4_2()) {
1601       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1602         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1603       }
1604     } else {
1605       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1606         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1607       }
1608       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1609     }
1610 
1611     // some defaults for AMD family 15h
1612     if (cpu_family() == 0x15) {
1613       // On family 15h processors default is no sw prefetch
1614       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1615         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1616       }
1617       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1618       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1619         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1620       }
1621       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1622       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1623         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1624       }
1625       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1626         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1627       }
1628     }
1629 
1630 #ifdef COMPILER2
1631     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1632       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1633       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1634     }
1635 #endif // COMPILER2
1636 
1637     // Some defaults for AMD family >= 17h && Hygon family 18h
1638     if (cpu_family() >= 0x17) {
1639       // On family >=17h processors use XMM and UnalignedLoadStores
1640       // for Array Copy
1641       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1642         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1643       }
1644       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1645         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1646       }
1647 #ifdef COMPILER2
1648       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1649         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1650       }
1651 #endif
1652     }
1653   }
1654 
1655   if (is_intel()) { // Intel cpus specific settings
1656     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1657       UseStoreImmI16 = false; // don't use it on Intel cpus
1658     }
1659     if (cpu_family() == 6 || cpu_family() == 15) {
1660       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1661         // Use it on all Intel cpus starting from PentiumPro
1662         UseAddressNop = true;
1663       }
1664     }
1665     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1666       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1667     }
1668     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1669       if (supports_sse3()) {
1670         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1671       } else {
1672         UseXmmRegToRegMoveAll = false;
1673       }
1674     }
1675     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1676 #ifdef COMPILER2
1677       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1678         // For new Intel cpus do the next optimization:
1679         // don't align the beginning of a loop if there are enough instructions
1680         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1681         // in current fetch line (OptoLoopAlignment) or the padding
1682         // is big (> MaxLoopPad).
1683         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1684         // generated NOP instructions. 11 is the largest size of one
1685         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1686         MaxLoopPad = 11;
1687       }
1688 #endif // COMPILER2
1689 
1690       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1691         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1692       }
1693       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1694         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1695           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1696         }
1697       }
1698       if (supports_sse4_2()) {
1699         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1700           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1701         }
1702       } else {
1703         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1704           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1705         }
1706         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1707       }
1708     }
1709     if (is_atom_family() || is_knights_family()) {
1710 #ifdef COMPILER2
1711       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1712         OptoScheduling = true;
1713       }
1714 #endif
1715       if (supports_sse4_2()) { // Silvermont
1716         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1717           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1718         }
1719       }
1720       if (FLAG_IS_DEFAULT(UseIncDec)) {
1721         FLAG_SET_DEFAULT(UseIncDec, false);
1722       }
1723     }
1724     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1725       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1726     }
1727 #ifdef COMPILER2
1728     if (UseAVX > 2) {
1729       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1730           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1731            ArrayOperationPartialInlineSize != 0 &&
1732            ArrayOperationPartialInlineSize != 16 &&
1733            ArrayOperationPartialInlineSize != 32 &&
1734            ArrayOperationPartialInlineSize != 64)) {
1735         int inline_size = 0;
1736         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1737           inline_size = 64;
1738         } else if (MaxVectorSize >= 32) {
1739           inline_size = 32;
1740         } else if (MaxVectorSize >= 16) {
1741           inline_size = 16;
1742         }
1743         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1744           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1745         }
1746         ArrayOperationPartialInlineSize = inline_size;
1747       }
1748 
1749       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1750         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1751         if (ArrayOperationPartialInlineSize) {
1752           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1753         } else {
1754           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1755         }
1756       }
1757     }
1758 #endif
1759   }
1760 
1761 #ifdef COMPILER2
1762   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1763     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1764       OptimizeFill = false;
1765     }
1766   }
1767 #endif
1768 
1769 #ifdef _LP64
1770   if (UseSSE42Intrinsics) {
1771     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1772       UseVectorizedMismatchIntrinsic = true;
1773     }
1774   } else if (UseVectorizedMismatchIntrinsic) {
1775     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1776       warning("vectorizedMismatch intrinsics are not available on this CPU");
1777     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1778   }
1779   if (UseAVX >= 2) {
1780     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1781   } else if (UseVectorizedHashCodeIntrinsic) {
1782     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1783       warning("vectorizedHashCode intrinsics are not available on this CPU");
1784     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1785   }
1786 #else
1787   if (UseVectorizedMismatchIntrinsic) {
1788     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1789       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1790     }
1791     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1792   }
1793   if (UseVectorizedHashCodeIntrinsic) {
1794     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1795       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1796     }
1797     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1798   }
1799 #endif // _LP64
1800 
1801   // Use count leading zeros count instruction if available.
1802   if (supports_lzcnt()) {
1803     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1804       UseCountLeadingZerosInstruction = true;
1805     }
1806    } else if (UseCountLeadingZerosInstruction) {
1807     warning("lzcnt instruction is not available on this CPU");
1808     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1809   }
1810 
1811   // Use count trailing zeros instruction if available
1812   if (supports_bmi1()) {
1813     // tzcnt does not require VEX prefix
1814     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1815       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1816         // Don't use tzcnt if BMI1 is switched off on command line.
1817         UseCountTrailingZerosInstruction = false;
1818       } else {
1819         UseCountTrailingZerosInstruction = true;
1820       }
1821     }
1822   } else if (UseCountTrailingZerosInstruction) {
1823     warning("tzcnt instruction is not available on this CPU");
1824     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1825   }
1826 
1827   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1828   // VEX prefix is generated only when AVX > 0.
1829   if (supports_bmi1() && supports_avx()) {
1830     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1831       UseBMI1Instructions = true;
1832     }
1833   } else if (UseBMI1Instructions) {
1834     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1835     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1836   }
1837 
1838   if (supports_bmi2() && supports_avx()) {
1839     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1840       UseBMI2Instructions = true;
1841     }
1842   } else if (UseBMI2Instructions) {
1843     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1844     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1845   }
1846 
1847   // Use population count instruction if available.
1848   if (supports_popcnt()) {
1849     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1850       UsePopCountInstruction = true;
1851     }
1852   } else if (UsePopCountInstruction) {
1853     warning("POPCNT instruction is not available on this CPU");
1854     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1855   }
1856 
1857   // Use fast-string operations if available.
1858   if (supports_erms()) {
1859     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1860       UseFastStosb = true;
1861     }
1862   } else if (UseFastStosb) {
1863     warning("fast-string operations are not available on this CPU");
1864     FLAG_SET_DEFAULT(UseFastStosb, false);
1865   }
1866 
1867   // For AMD Processors use XMM/YMM MOVDQU instructions
1868   // for Object Initialization as default
1869   if (is_amd() && cpu_family() >= 0x19) {
1870     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1871       UseFastStosb = false;
1872     }
1873   }
1874 
1875 #ifdef COMPILER2
1876   if (is_intel() && MaxVectorSize > 16) {
1877     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1878       UseFastStosb = false;
1879     }
1880   }
1881 #endif
1882 
1883   // Use XMM/YMM MOVDQU instruction for Object Initialization
1884   if (UseUnalignedLoadStores) {
1885     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1886       UseXMMForObjInit = true;
1887     }
1888   } else if (UseXMMForObjInit) {
1889     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1890     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1891   }
1892 
1893 #ifdef COMPILER2
1894   if (FLAG_IS_DEFAULT(AlignVector)) {
1895     // Modern processors allow misaligned memory operations for vectors.
1896     AlignVector = !UseUnalignedLoadStores;
1897   }
1898 #endif // COMPILER2
1899 
1900   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1901     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1902       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1903     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1904       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1905     }
1906   }
1907 
1908   // Allocation prefetch settings
1909   int cache_line_size = checked_cast<int>(prefetch_data_size());
1910   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1911       (cache_line_size > AllocatePrefetchStepSize)) {
1912     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1913   }
1914 
1915   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1916     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1917     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1918       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1919     }
1920     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1921   }
1922 
1923   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1924     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1925     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1926   }
1927 
1928   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1929     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1930         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1931       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1932     }
1933 #ifdef COMPILER2
1934     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1935       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1936     }
1937 #endif
1938   }
1939 
1940   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1941 #ifdef COMPILER2
1942     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1943       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1944     }
1945 #endif
1946   }
1947 
1948 #ifdef _LP64
1949   // Prefetch settings
1950 
1951   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1952   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1953   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1954   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1955 
1956   // gc copy/scan is disabled if prefetchw isn't supported, because
1957   // Prefetch::write emits an inlined prefetchw on Linux.
1958   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1959   // The used prefetcht0 instruction works for both amd64 and em64t.
1960 
1961   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1962     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1963   }
1964   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1965     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1966   }
1967 #endif
1968 
1969   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1970      (cache_line_size > ContendedPaddingWidth))
1971      ContendedPaddingWidth = cache_line_size;
1972 
1973   // This machine allows unaligned memory accesses
1974   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1975     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1976   }
1977 
1978 #ifndef PRODUCT
1979   if (log_is_enabled(Info, os, cpu)) {
1980     LogStream ls(Log(os, cpu)::info());
1981     outputStream* log = &ls;
1982     log->print_cr("Logical CPUs per core: %u",
1983                   logical_processors_per_package());
1984     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1985     log->print("UseSSE=%d", UseSSE);
1986     if (UseAVX > 0) {
1987       log->print("  UseAVX=%d", UseAVX);
1988     }
1989     if (UseAES) {
1990       log->print("  UseAES=1");
1991     }
1992 #ifdef COMPILER2
1993     if (MaxVectorSize > 0) {
1994       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1995     }
1996 #endif
1997     log->cr();
1998     log->print("Allocation");
1999     if (AllocatePrefetchStyle <= 0) {
2000       log->print_cr(": no prefetching");
2001     } else {
2002       log->print(" prefetching: ");
2003       if (AllocatePrefetchInstr == 0) {
2004         log->print("PREFETCHNTA");
2005       } else if (AllocatePrefetchInstr == 1) {
2006         log->print("PREFETCHT0");
2007       } else if (AllocatePrefetchInstr == 2) {
2008         log->print("PREFETCHT2");
2009       } else if (AllocatePrefetchInstr == 3) {
2010         log->print("PREFETCHW");
2011       }
2012       if (AllocatePrefetchLines > 1) {
2013         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
2014       } else {
2015         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
2016       }
2017     }
2018 
2019     if (PrefetchCopyIntervalInBytes > 0) {
2020       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
2021     }
2022     if (PrefetchScanIntervalInBytes > 0) {
2023       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
2024     }
2025     if (ContendedPaddingWidth > 0) {
2026       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
2027     }
2028   }
2029 #endif // !PRODUCT
2030   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
2031       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
2032   }
2033   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
2034       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
2035   }
2036 }
2037 
2038 void VM_Version::print_platform_virtualization_info(outputStream* st) {
2039   VirtualizationType vrt = VM_Version::get_detected_virtualization();
2040   if (vrt == XenHVM) {
2041     st->print_cr("Xen hardware-assisted virtualization detected");
2042   } else if (vrt == KVM) {
2043     st->print_cr("KVM virtualization detected");
2044   } else if (vrt == VMWare) {
2045     st->print_cr("VMWare virtualization detected");
2046     VirtualizationSupport::print_virtualization_info(st);
2047   } else if (vrt == HyperV) {
2048     st->print_cr("Hyper-V virtualization detected");
2049   } else if (vrt == HyperVRole) {
2050     st->print_cr("Hyper-V role detected");
2051   }
2052 }
2053 
2054 bool VM_Version::compute_has_intel_jcc_erratum() {
2055   if (!is_intel_family_core()) {
2056     // Only Intel CPUs are affected.
2057     return false;
2058   }
2059   // The following table of affected CPUs is based on the following document released by Intel:
2060   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2061   switch (_model) {
2062   case 0x8E:
2063     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2064     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2065     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2066     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2067     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2068     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2069     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2070     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2071     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2072     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2073   case 0x4E:
2074     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2075     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2076     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2077     return _stepping == 0x3;
2078   case 0x55:
2079     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2080     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2081     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2082     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2083     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2084     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2085     return _stepping == 0x4 || _stepping == 0x7;
2086   case 0x5E:
2087     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2088     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2089     return _stepping == 0x3;
2090   case 0x9E:
2091     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2092     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2093     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2094     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2095     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2096     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2097     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2098     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2099     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2100     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2101     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2102     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2103     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2104     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2105     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2106   case 0xA5:
2107     // Not in Intel documentation.
2108     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2109     return true;
2110   case 0xA6:
2111     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2112     return _stepping == 0x0;
2113   case 0xAE:
2114     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2115     return _stepping == 0xA;
2116   default:
2117     // If we are running on another intel machine not recognized in the table, we are okay.
2118     return false;
2119   }
2120 }
2121 
2122 // On Xen, the cpuid instruction returns
2123 //  eax / registers[0]: Version of Xen
2124 //  ebx / registers[1]: chars 'XenV'
2125 //  ecx / registers[2]: chars 'MMXe'
2126 //  edx / registers[3]: chars 'nVMM'
2127 //
2128 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2129 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2130 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2131 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2132 //
2133 // more information :
2134 // https://kb.vmware.com/s/article/1009458
2135 //
2136 void VM_Version::check_virtualizations() {
2137   uint32_t registers[4] = {0};
2138   char signature[13] = {0};
2139 
2140   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2141   // from 0x40000000 until 0x40010000.
2142   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2143   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2144     detect_virt_stub(leaf, registers);
2145     memcpy(signature, &registers[1], 12);
2146 
2147     if (strncmp("VMwareVMware", signature, 12) == 0) {
2148       Abstract_VM_Version::_detected_virtualization = VMWare;
2149       // check for extended metrics from guestlib
2150       VirtualizationSupport::initialize();
2151     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2152       Abstract_VM_Version::_detected_virtualization = HyperV;
2153 #ifdef _WINDOWS
2154       // CPUID leaf 0x40000007 is available to the root partition only.
2155       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2156       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2157       detect_virt_stub(0x40000007, registers);
2158       if ((registers[0] != 0x0) ||
2159           (registers[1] != 0x0) ||
2160           (registers[2] != 0x0) ||
2161           (registers[3] != 0x0)) {
2162         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2163       }
2164 #endif
2165     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2166       Abstract_VM_Version::_detected_virtualization = KVM;
2167     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2168       Abstract_VM_Version::_detected_virtualization = XenHVM;
2169     }
2170   }
2171 }
2172 
2173 #ifdef COMPILER2
2174 // Determine if it's running on Cascade Lake using default options.
2175 bool VM_Version::is_default_intel_cascade_lake() {
2176   return FLAG_IS_DEFAULT(UseAVX) &&
2177          FLAG_IS_DEFAULT(MaxVectorSize) &&
2178          UseAVX > 2 &&
2179          is_intel_cascade_lake();
2180 }
2181 #endif
2182 
2183 bool VM_Version::is_intel_cascade_lake() {
2184   return is_intel_skylake() && _stepping >= 5;
2185 }
2186 
2187 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2188 // for implementing the array copy and clear operations.
2189 // The Intel platforms that supports the serialize instruction
2190 // has improved implementation of 64-byte load/stores and so the default
2191 // threshold is set to 0 for these platforms.
2192 int VM_Version::avx3_threshold() {
2193   return (is_intel_family_core() &&
2194           supports_serialize() &&
2195           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2196 }
2197 
2198 #if defined(_LP64)
2199 void VM_Version::clear_apx_test_state() {
2200   clear_apx_test_state_stub();
2201 }
2202 #endif
2203 
2204 static bool _vm_version_initialized = false;
2205 
2206 void VM_Version::initialize() {
2207   ResourceMark rm;
2208   // Making this stub must be FIRST use of assembler
2209   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2210   if (stub_blob == nullptr) {
2211     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2212   }
2213   CodeBuffer c(stub_blob);
2214   VM_Version_StubGenerator g(&c);
2215 
2216   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2217                                      g.generate_get_cpu_info());
2218   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2219                                      g.generate_detect_virt());
2220 
2221 #if defined(_LP64)
2222   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2223                                      g.clear_apx_test_state());
2224 #endif
2225   get_processor_features();
2226 
2227   LP64_ONLY(Assembler::precompute_instructions();)
2228 
2229   if (VM_Version::supports_hv()) { // Supports hypervisor
2230     check_virtualizations();
2231   }
2232   _vm_version_initialized = true;
2233 }
2234 
2235 typedef enum {
2236    CPU_FAMILY_8086_8088  = 0,
2237    CPU_FAMILY_INTEL_286  = 2,
2238    CPU_FAMILY_INTEL_386  = 3,
2239    CPU_FAMILY_INTEL_486  = 4,
2240    CPU_FAMILY_PENTIUM    = 5,
2241    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2242    CPU_FAMILY_PENTIUM_4  = 0xF
2243 } FamilyFlag;
2244 
2245 typedef enum {
2246   RDTSCP_FLAG  = 0x08000000, // bit 27
2247   INTEL64_FLAG = 0x20000000  // bit 29
2248 } _featureExtendedEdxFlag;
2249 
2250 typedef enum {
2251    FPU_FLAG     = 0x00000001,
2252    VME_FLAG     = 0x00000002,
2253    DE_FLAG      = 0x00000004,
2254    PSE_FLAG     = 0x00000008,
2255    TSC_FLAG     = 0x00000010,
2256    MSR_FLAG     = 0x00000020,
2257    PAE_FLAG     = 0x00000040,
2258    MCE_FLAG     = 0x00000080,
2259    CX8_FLAG     = 0x00000100,
2260    APIC_FLAG    = 0x00000200,
2261    SEP_FLAG     = 0x00000800,
2262    MTRR_FLAG    = 0x00001000,
2263    PGE_FLAG     = 0x00002000,
2264    MCA_FLAG     = 0x00004000,
2265    CMOV_FLAG    = 0x00008000,
2266    PAT_FLAG     = 0x00010000,
2267    PSE36_FLAG   = 0x00020000,
2268    PSNUM_FLAG   = 0x00040000,
2269    CLFLUSH_FLAG = 0x00080000,
2270    DTS_FLAG     = 0x00200000,
2271    ACPI_FLAG    = 0x00400000,
2272    MMX_FLAG     = 0x00800000,
2273    FXSR_FLAG    = 0x01000000,
2274    SSE_FLAG     = 0x02000000,
2275    SSE2_FLAG    = 0x04000000,
2276    SS_FLAG      = 0x08000000,
2277    HTT_FLAG     = 0x10000000,
2278    TM_FLAG      = 0x20000000
2279 } FeatureEdxFlag;
2280 
2281 static BufferBlob* cpuid_brand_string_stub_blob;
2282 static const int   cpuid_brand_string_stub_size = 550;
2283 
2284 extern "C" {
2285   typedef void (*getCPUIDBrandString_stub_t)(void*);
2286 }
2287 
2288 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2289 
2290 // VM_Version statics
2291 enum {
2292   ExtendedFamilyIdLength_INTEL = 16,
2293   ExtendedFamilyIdLength_AMD   = 24
2294 };
2295 
2296 const size_t VENDOR_LENGTH = 13;
2297 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2298 static char* _cpu_brand_string = nullptr;
2299 static int64_t _max_qualified_cpu_frequency = 0;
2300 
2301 static int _no_of_threads = 0;
2302 static int _no_of_cores = 0;
2303 
2304 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2305   "8086/8088",
2306   "",
2307   "286",
2308   "386",
2309   "486",
2310   "Pentium",
2311   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2312   "",
2313   "",
2314   "",
2315   "",
2316   "",
2317   "",
2318   "",
2319   "",
2320   "Pentium 4"
2321 };
2322 
2323 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2324   "",
2325   "",
2326   "",
2327   "",
2328   "5x86",
2329   "K5/K6",
2330   "Athlon/AthlonXP",
2331   "",
2332   "",
2333   "",
2334   "",
2335   "",
2336   "",
2337   "",
2338   "",
2339   "Opteron/Athlon64",
2340   "Opteron QC/Phenom",  // Barcelona et.al.
2341   "",
2342   "",
2343   "",
2344   "",
2345   "",
2346   "",
2347   "Zen"
2348 };
2349 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2350 // September 2013, Vol 3C Table 35-1
2351 const char* const _model_id_pentium_pro[] = {
2352   "",
2353   "Pentium Pro",
2354   "",
2355   "Pentium II model 3",
2356   "",
2357   "Pentium II model 5/Xeon/Celeron",
2358   "Celeron",
2359   "Pentium III/Pentium III Xeon",
2360   "Pentium III/Pentium III Xeon",
2361   "Pentium M model 9",    // Yonah
2362   "Pentium III, model A",
2363   "Pentium III, model B",
2364   "",
2365   "Pentium M model D",    // Dothan
2366   "",
2367   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2368   "",
2369   "",
2370   "",
2371   "",
2372   "",
2373   "",
2374   "Celeron",              // 0x16 Celeron 65nm
2375   "Core 2",               // 0x17 Penryn / Harpertown
2376   "",
2377   "",
2378   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2379   "Atom",                 // 0x1B Z5xx series Silverthorn
2380   "",
2381   "Core 2",               // 0x1D Dunnington (6-core)
2382   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2390   "",
2391   "",
2392   "",                     // 0x28
2393   "",
2394   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2395   "",
2396   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2397   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2398   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2399   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2400   "",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "",
2408   "",
2409   "",
2410   "Ivy Bridge",           // 0x3a
2411   "",
2412   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2413   "",                     // 0x3d "Next Generation Intel Core Processor"
2414   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2415   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2416   "",
2417   "",
2418   "",
2419   "",
2420   "",
2421   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2422   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2423   nullptr
2424 };
2425 
2426 /* Brand ID is for back compatibility
2427  * Newer CPUs uses the extended brand string */
2428 const char* const _brand_id[] = {
2429   "",
2430   "Celeron processor",
2431   "Pentium III processor",
2432   "Intel Pentium III Xeon processor",
2433   "",
2434   "",
2435   "",
2436   "",
2437   "Intel Pentium 4 processor",
2438   nullptr
2439 };
2440 
2441 
2442 const char* const _feature_edx_id[] = {
2443   "On-Chip FPU",
2444   "Virtual Mode Extensions",
2445   "Debugging Extensions",
2446   "Page Size Extensions",
2447   "Time Stamp Counter",
2448   "Model Specific Registers",
2449   "Physical Address Extension",
2450   "Machine Check Exceptions",
2451   "CMPXCHG8B Instruction",
2452   "On-Chip APIC",
2453   "",
2454   "Fast System Call",
2455   "Memory Type Range Registers",
2456   "Page Global Enable",
2457   "Machine Check Architecture",
2458   "Conditional Mov Instruction",
2459   "Page Attribute Table",
2460   "36-bit Page Size Extension",
2461   "Processor Serial Number",
2462   "CLFLUSH Instruction",
2463   "",
2464   "Debug Trace Store feature",
2465   "ACPI registers in MSR space",
2466   "Intel Architecture MMX Technology",
2467   "Fast Float Point Save and Restore",
2468   "Streaming SIMD extensions",
2469   "Streaming SIMD extensions 2",
2470   "Self-Snoop",
2471   "Hyper Threading",
2472   "Thermal Monitor",
2473   "",
2474   "Pending Break Enable"
2475 };
2476 
2477 const char* const _feature_extended_edx_id[] = {
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "",
2485   "",
2486   "",
2487   "",
2488   "",
2489   "SYSCALL/SYSRET",
2490   "",
2491   "",
2492   "",
2493   "",
2494   "",
2495   "",
2496   "",
2497   "",
2498   "Execute Disable Bit",
2499   "",
2500   "",
2501   "",
2502   "",
2503   "",
2504   "",
2505   "RDTSCP",
2506   "",
2507   "Intel 64 Architecture",
2508   "",
2509   ""
2510 };
2511 
2512 const char* const _feature_ecx_id[] = {
2513   "Streaming SIMD Extensions 3",
2514   "PCLMULQDQ",
2515   "64-bit DS Area",
2516   "MONITOR/MWAIT instructions",
2517   "CPL Qualified Debug Store",
2518   "Virtual Machine Extensions",
2519   "Safer Mode Extensions",
2520   "Enhanced Intel SpeedStep technology",
2521   "Thermal Monitor 2",
2522   "Supplemental Streaming SIMD Extensions 3",
2523   "L1 Context ID",
2524   "",
2525   "Fused Multiply-Add",
2526   "CMPXCHG16B",
2527   "xTPR Update Control",
2528   "Perfmon and Debug Capability",
2529   "",
2530   "Process-context identifiers",
2531   "Direct Cache Access",
2532   "Streaming SIMD extensions 4.1",
2533   "Streaming SIMD extensions 4.2",
2534   "x2APIC",
2535   "MOVBE",
2536   "Popcount instruction",
2537   "TSC-Deadline",
2538   "AESNI",
2539   "XSAVE",
2540   "OSXSAVE",
2541   "AVX",
2542   "F16C",
2543   "RDRAND",
2544   ""
2545 };
2546 
2547 const char* const _feature_extended_ecx_id[] = {
2548   "LAHF/SAHF instruction support",
2549   "Core multi-processor legacy mode",
2550   "",
2551   "",
2552   "",
2553   "Advanced Bit Manipulations: LZCNT",
2554   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2555   "Misaligned SSE mode",
2556   "",
2557   "",
2558   "",
2559   "",
2560   "",
2561   "",
2562   "",
2563   "",
2564   "",
2565   "",
2566   "",
2567   "",
2568   "",
2569   "",
2570   "",
2571   "",
2572   "",
2573   "",
2574   "",
2575   "",
2576   "",
2577   "",
2578   "",
2579   ""
2580 };
2581 
2582 void VM_Version::initialize_tsc(void) {
2583   ResourceMark rm;
2584 
2585   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2586   if (cpuid_brand_string_stub_blob == nullptr) {
2587     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2588   }
2589   CodeBuffer c(cpuid_brand_string_stub_blob);
2590   VM_Version_StubGenerator g(&c);
2591   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2592                                    g.generate_getCPUIDBrandString());
2593 }
2594 
2595 const char* VM_Version::cpu_model_description(void) {
2596   uint32_t cpu_family = extended_cpu_family();
2597   uint32_t cpu_model = extended_cpu_model();
2598   const char* model = nullptr;
2599 
2600   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2601     for (uint32_t i = 0; i <= cpu_model; i++) {
2602       model = _model_id_pentium_pro[i];
2603       if (model == nullptr) {
2604         break;
2605       }
2606     }
2607   }
2608   return model;
2609 }
2610 
2611 const char* VM_Version::cpu_brand_string(void) {
2612   if (_cpu_brand_string == nullptr) {
2613     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2614     if (nullptr == _cpu_brand_string) {
2615       return nullptr;
2616     }
2617     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2618     if (ret_val != OS_OK) {
2619       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2620       _cpu_brand_string = nullptr;
2621     }
2622   }
2623   return _cpu_brand_string;
2624 }
2625 
2626 const char* VM_Version::cpu_brand(void) {
2627   const char*  brand  = nullptr;
2628 
2629   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2630     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2631     brand = _brand_id[0];
2632     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2633       brand = _brand_id[i];
2634     }
2635   }
2636   return brand;
2637 }
2638 
2639 bool VM_Version::cpu_is_em64t(void) {
2640   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2641 }
2642 
2643 bool VM_Version::is_netburst(void) {
2644   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2645 }
2646 
2647 bool VM_Version::supports_tscinv_ext(void) {
2648   if (!supports_tscinv_bit()) {
2649     return false;
2650   }
2651 
2652   if (is_intel()) {
2653     return true;
2654   }
2655 
2656   if (is_amd()) {
2657     return !is_amd_Barcelona();
2658   }
2659 
2660   if (is_hygon()) {
2661     return true;
2662   }
2663 
2664   return false;
2665 }
2666 
2667 void VM_Version::resolve_cpu_information_details(void) {
2668 
2669   // in future we want to base this information on proper cpu
2670   // and cache topology enumeration such as:
2671   // Intel 64 Architecture Processor Topology Enumeration
2672   // which supports system cpu and cache topology enumeration
2673   // either using 2xAPICIDs or initial APICIDs
2674 
2675   // currently only rough cpu information estimates
2676   // which will not necessarily reflect the exact configuration of the system
2677 
2678   // this is the number of logical hardware threads
2679   // visible to the operating system
2680   _no_of_threads = os::processor_count();
2681 
2682   // find out number of threads per cpu package
2683   int threads_per_package = threads_per_core() * cores_per_cpu();
2684 
2685   // use amount of threads visible to the process in order to guess number of sockets
2686   _no_of_sockets = _no_of_threads / threads_per_package;
2687 
2688   // process might only see a subset of the total number of threads
2689   // from a single processor package. Virtualization/resource management for example.
2690   // If so then just write a hard 1 as num of pkgs.
2691   if (0 == _no_of_sockets) {
2692     _no_of_sockets = 1;
2693   }
2694 
2695   // estimate the number of cores
2696   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2697 }
2698 
2699 
2700 const char* VM_Version::cpu_family_description(void) {
2701   int cpu_family_id = extended_cpu_family();
2702   if (is_amd()) {
2703     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2704       return _family_id_amd[cpu_family_id];
2705     }
2706   }
2707   if (is_intel()) {
2708     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2709       return cpu_model_description();
2710     }
2711     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2712       return _family_id_intel[cpu_family_id];
2713     }
2714   }
2715   if (is_hygon()) {
2716     return "Dhyana";
2717   }
2718   return "Unknown x86";
2719 }
2720 
2721 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2722   assert(buf != nullptr, "buffer is null!");
2723   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2724 
2725   const char* cpu_type = nullptr;
2726   const char* x64 = nullptr;
2727 
2728   if (is_intel()) {
2729     cpu_type = "Intel";
2730     x64 = cpu_is_em64t() ? " Intel64" : "";
2731   } else if (is_amd()) {
2732     cpu_type = "AMD";
2733     x64 = cpu_is_em64t() ? " AMD64" : "";
2734   } else if (is_hygon()) {
2735     cpu_type = "Hygon";
2736     x64 = cpu_is_em64t() ? " AMD64" : "";
2737   } else {
2738     cpu_type = "Unknown x86";
2739     x64 = cpu_is_em64t() ? " x86_64" : "";
2740   }
2741 
2742   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2743     cpu_type,
2744     cpu_family_description(),
2745     supports_ht() ? " (HT)" : "",
2746     supports_sse3() ? " SSE3" : "",
2747     supports_ssse3() ? " SSSE3" : "",
2748     supports_sse4_1() ? " SSE4.1" : "",
2749     supports_sse4_2() ? " SSE4.2" : "",
2750     supports_sse4a() ? " SSE4A" : "",
2751     is_netburst() ? " Netburst" : "",
2752     is_intel_family_core() ? " Core" : "",
2753     x64);
2754 
2755   return OS_OK;
2756 }
2757 
2758 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2759   assert(buf != nullptr, "buffer is null!");
2760   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2761   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2762 
2763   // invoke newly generated asm code to fetch CPU Brand String
2764   getCPUIDBrandString_stub(&_cpuid_info);
2765 
2766   // fetch results into buffer
2767   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2768   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2769   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2770   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2771   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2772   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2773   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2774   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2775   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2776   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2777   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2778   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2779 
2780   return OS_OK;
2781 }
2782 
2783 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2784   guarantee(buf != nullptr, "buffer is null!");
2785   guarantee(buf_len > 0, "buffer len not enough!");
2786 
2787   unsigned int flag = 0;
2788   unsigned int fi = 0;
2789   size_t       written = 0;
2790   const char*  prefix = "";
2791 
2792 #define WRITE_TO_BUF(string)                                                          \
2793   {                                                                                   \
2794     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2795     if (res < 0) {                                                                    \
2796       return buf_len - 1;                                                             \
2797     }                                                                                 \
2798     written += res;                                                                   \
2799     if (prefix[0] == '\0') {                                                          \
2800       prefix = ", ";                                                                  \
2801     }                                                                                 \
2802   }
2803 
2804   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2805     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2806       continue; /* no hyperthreading */
2807     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2808       continue; /* no fast system call */
2809     }
2810     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2811       WRITE_TO_BUF(_feature_edx_id[fi]);
2812     }
2813   }
2814 
2815   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2816     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2817       WRITE_TO_BUF(_feature_ecx_id[fi]);
2818     }
2819   }
2820 
2821   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2822     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2823       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2824     }
2825   }
2826 
2827   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2828     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2829       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2830     }
2831   }
2832 
2833   if (supports_tscinv_bit()) {
2834       WRITE_TO_BUF("Invariant TSC");
2835   }
2836 
2837   return written;
2838 }
2839 
2840 /**
2841  * Write a detailed description of the cpu to a given buffer, including
2842  * feature set.
2843  */
2844 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2845   assert(buf != nullptr, "buffer is null!");
2846   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2847 
2848   static const char* unknown = "<unknown>";
2849   char               vendor_id[VENDOR_LENGTH];
2850   const char*        family = nullptr;
2851   const char*        model = nullptr;
2852   const char*        brand = nullptr;
2853   int                outputLen = 0;
2854 
2855   family = cpu_family_description();
2856   if (family == nullptr) {
2857     family = unknown;
2858   }
2859 
2860   model = cpu_model_description();
2861   if (model == nullptr) {
2862     model = unknown;
2863   }
2864 
2865   brand = cpu_brand_string();
2866 
2867   if (brand == nullptr) {
2868     brand = cpu_brand();
2869     if (brand == nullptr) {
2870       brand = unknown;
2871     }
2872   }
2873 
2874   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2875   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2876   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2877   vendor_id[VENDOR_LENGTH-1] = '\0';
2878 
2879   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2880     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2881     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2882     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2883     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2884     "Supports: ",
2885     brand,
2886     vendor_id,
2887     family,
2888     extended_cpu_family(),
2889     model,
2890     extended_cpu_model(),
2891     cpu_stepping(),
2892     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2893     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2894     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2895     _cpuid_info.std_cpuid1_eax.value,
2896     _cpuid_info.std_cpuid1_ebx.value,
2897     _cpuid_info.std_cpuid1_ecx.value,
2898     _cpuid_info.std_cpuid1_edx.value,
2899     _cpuid_info.ext_cpuid1_eax,
2900     _cpuid_info.ext_cpuid1_ebx,
2901     _cpuid_info.ext_cpuid1_ecx,
2902     _cpuid_info.ext_cpuid1_edx);
2903 
2904   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2905     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2906     return OS_ERR;
2907   }
2908 
2909   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2910 
2911   return OS_OK;
2912 }
2913 
2914 
2915 // Fill in Abstract_VM_Version statics
2916 void VM_Version::initialize_cpu_information() {
2917   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2918   assert(!_initialized, "shouldn't be initialized yet");
2919   resolve_cpu_information_details();
2920 
2921   // initialize cpu_name and cpu_desc
2922   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2923   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2924   _initialized = true;
2925 }
2926 
2927 /**
2928  *  For information about extracting the frequency from the cpu brand string, please see:
2929  *
2930  *    Intel Processor Identification and the CPUID Instruction
2931  *    Application Note 485
2932  *    May 2012
2933  *
2934  * The return value is the frequency in Hz.
2935  */
2936 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2937   const char* const brand_string = cpu_brand_string();
2938   if (brand_string == nullptr) {
2939     return 0;
2940   }
2941   const int64_t MEGA = 1000000;
2942   int64_t multiplier = 0;
2943   int64_t frequency = 0;
2944   uint8_t idx = 0;
2945   // The brand string buffer is at most 48 bytes.
2946   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2947   for (; idx < 48-2; ++idx) {
2948     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2949     // Search brand string for "yHz" where y is M, G, or T.
2950     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2951       if (brand_string[idx] == 'M') {
2952         multiplier = MEGA;
2953       } else if (brand_string[idx] == 'G') {
2954         multiplier = MEGA * 1000;
2955       } else if (brand_string[idx] == 'T') {
2956         multiplier = MEGA * MEGA;
2957       }
2958       break;
2959     }
2960   }
2961   if (multiplier > 0) {
2962     // Compute frequency (in Hz) from brand string.
2963     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2964       frequency =  (brand_string[idx-4] - '0') * multiplier;
2965       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2966       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2967     } else { // format is "xxxx"
2968       frequency =  (brand_string[idx-4] - '0') * 1000;
2969       frequency += (brand_string[idx-3] - '0') * 100;
2970       frequency += (brand_string[idx-2] - '0') * 10;
2971       frequency += (brand_string[idx-1] - '0');
2972       frequency *= multiplier;
2973     }
2974   }
2975   return frequency;
2976 }
2977 
2978 
2979 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2980   if (_max_qualified_cpu_frequency == 0) {
2981     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2982   }
2983   return _max_qualified_cpu_frequency;
2984 }
2985 
2986 uint64_t VM_Version::CpuidInfo::feature_flags() const {
2987   uint64_t result = 0;
2988   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2989     result |= CPU_CX8;
2990   if (std_cpuid1_edx.bits.cmov != 0)
2991     result |= CPU_CMOV;
2992   if (std_cpuid1_edx.bits.clflush != 0)
2993     result |= CPU_FLUSH;
2994 #ifdef _LP64
2995   // clflush should always be available on x86_64
2996   // if not we are in real trouble because we rely on it
2997   // to flush the code cache.
2998   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2999 #endif
3000   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
3001       ext_cpuid1_edx.bits.fxsr != 0))
3002     result |= CPU_FXSR;
3003   // HT flag is set for multi-core processors also.
3004   if (threads_per_core() > 1)
3005     result |= CPU_HT;
3006   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
3007       ext_cpuid1_edx.bits.mmx != 0))
3008     result |= CPU_MMX;
3009   if (std_cpuid1_edx.bits.sse != 0)
3010     result |= CPU_SSE;
3011   if (std_cpuid1_edx.bits.sse2 != 0)
3012     result |= CPU_SSE2;
3013   if (std_cpuid1_ecx.bits.sse3 != 0)
3014     result |= CPU_SSE3;
3015   if (std_cpuid1_ecx.bits.ssse3 != 0)
3016     result |= CPU_SSSE3;
3017   if (std_cpuid1_ecx.bits.sse4_1 != 0)
3018     result |= CPU_SSE4_1;
3019   if (std_cpuid1_ecx.bits.sse4_2 != 0)
3020     result |= CPU_SSE4_2;
3021   if (std_cpuid1_ecx.bits.popcnt != 0)
3022     result |= CPU_POPCNT;
3023   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
3024       xem_xcr0_eax.bits.apx_f != 0) {
3025     result |= CPU_APX_F;
3026   }
3027   if (std_cpuid1_ecx.bits.avx != 0 &&
3028       std_cpuid1_ecx.bits.osxsave != 0 &&
3029       xem_xcr0_eax.bits.sse != 0 &&
3030       xem_xcr0_eax.bits.ymm != 0) {
3031     result |= CPU_AVX;
3032     result |= CPU_VZEROUPPER;
3033     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
3034       result |= CPU_SHA512;
3035     if (std_cpuid1_ecx.bits.f16c != 0)
3036       result |= CPU_F16C;
3037     if (sef_cpuid7_ebx.bits.avx2 != 0) {
3038       result |= CPU_AVX2;
3039       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
3040         result |= CPU_AVX_IFMA;
3041     }
3042     if (sef_cpuid7_ecx.bits.gfni != 0)
3043         result |= CPU_GFNI;
3044     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
3045         xem_xcr0_eax.bits.opmask != 0 &&
3046         xem_xcr0_eax.bits.zmm512 != 0 &&
3047         xem_xcr0_eax.bits.zmm32 != 0) {
3048       result |= CPU_AVX512F;
3049       if (sef_cpuid7_ebx.bits.avx512cd != 0)
3050         result |= CPU_AVX512CD;
3051       if (sef_cpuid7_ebx.bits.avx512dq != 0)
3052         result |= CPU_AVX512DQ;
3053       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
3054         result |= CPU_AVX512_IFMA;
3055       if (sef_cpuid7_ebx.bits.avx512pf != 0)
3056         result |= CPU_AVX512PF;
3057       if (sef_cpuid7_ebx.bits.avx512er != 0)
3058         result |= CPU_AVX512ER;
3059       if (sef_cpuid7_ebx.bits.avx512bw != 0)
3060         result |= CPU_AVX512BW;
3061       if (sef_cpuid7_ebx.bits.avx512vl != 0)
3062         result |= CPU_AVX512VL;
3063       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3064         result |= CPU_AVX512_VPOPCNTDQ;
3065       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3066         result |= CPU_AVX512_VPCLMULQDQ;
3067       if (sef_cpuid7_ecx.bits.vaes != 0)
3068         result |= CPU_AVX512_VAES;
3069       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3070         result |= CPU_AVX512_VNNI;
3071       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3072         result |= CPU_AVX512_BITALG;
3073       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3074         result |= CPU_AVX512_VBMI;
3075       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3076         result |= CPU_AVX512_VBMI2;
3077     }
3078   }
3079   if (std_cpuid1_ecx.bits.hv != 0)
3080     result |= CPU_HV;
3081   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3082     result |= CPU_BMI1;
3083   if (std_cpuid1_edx.bits.tsc != 0)
3084     result |= CPU_TSC;
3085   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3086     result |= CPU_TSCINV_BIT;
3087   if (std_cpuid1_ecx.bits.aes != 0)
3088     result |= CPU_AES;
3089   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3090     result |= CPU_LZCNT;
3091   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3092     result |= CPU_3DNOW_PREFETCH;
3093   if (sef_cpuid7_ebx.bits.erms != 0)
3094     result |= CPU_ERMS;
3095   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3096     result |= CPU_FSRM;
3097   if (std_cpuid1_ecx.bits.clmul != 0)
3098     result |= CPU_CLMUL;
3099   if (sef_cpuid7_ebx.bits.rtm != 0)
3100     result |= CPU_RTM;
3101   if (sef_cpuid7_ebx.bits.adx != 0)
3102      result |= CPU_ADX;
3103   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3104     result |= CPU_BMI2;
3105   if (sef_cpuid7_ebx.bits.sha != 0)
3106     result |= CPU_SHA;
3107   if (std_cpuid1_ecx.bits.fma != 0)
3108     result |= CPU_FMA;
3109   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3110     result |= CPU_FLUSHOPT;
3111   if (sef_cpuid7_ebx.bits.clwb != 0)
3112     result |= CPU_CLWB;
3113   if (ext_cpuid1_edx.bits.rdtscp != 0)
3114     result |= CPU_RDTSCP;
3115   if (sef_cpuid7_ecx.bits.rdpid != 0)
3116     result |= CPU_RDPID;
3117 
3118   // AMD|Hygon additional features.
3119   if (is_amd_family()) {
3120     // PREFETCHW was checked above, check TDNOW here.
3121     if ((ext_cpuid1_edx.bits.tdnow != 0))
3122       result |= CPU_3DNOW_PREFETCH;
3123     if (ext_cpuid1_ecx.bits.sse4a != 0)
3124       result |= CPU_SSE4A;
3125   }
3126 
3127   // Intel additional features.
3128   if (is_intel()) {
3129     if (sef_cpuid7_edx.bits.serialize != 0)
3130       result |= CPU_SERIALIZE;
3131     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3132       result |= CPU_AVX512_FP16;
3133   }
3134 
3135   // ZX additional features.
3136   if (is_zx()) {
3137     // We do not know if these are supported by ZX, so we cannot trust
3138     // common CPUID bit for them.
3139     assert((result & CPU_CLWB) == 0, "Check if it is supported?");
3140     result &= ~CPU_CLWB;
3141   }
3142 
3143   // Protection key features.
3144   if (sef_cpuid7_ecx.bits.pku != 0) {
3145     result |= CPU_PKU;
3146   }
3147   if (sef_cpuid7_ecx.bits.ospke != 0) {
3148     result |= CPU_OSPKE;
3149   }
3150 
3151   // Control flow enforcement (CET) features.
3152   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3153     result |= CPU_CET_SS;
3154   }
3155   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3156     result |= CPU_CET_IBT;
3157   }
3158 
3159   // Composite features.
3160   if (supports_tscinv_bit() &&
3161       ((is_amd_family() && !is_amd_Barcelona()) ||
3162        is_intel_tsc_synched_at_init())) {
3163     result |= CPU_TSCINV;
3164   }
3165 
3166   return result;
3167 }
3168 
3169 bool VM_Version::os_supports_avx_vectors() {
3170   bool retVal = false;
3171   int nreg = 2 LP64_ONLY(+2);
3172   if (supports_evex()) {
3173     // Verify that OS save/restore all bits of EVEX registers
3174     // during signal processing.
3175     retVal = true;
3176     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3177       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3178         retVal = false;
3179         break;
3180       }
3181     }
3182   } else if (supports_avx()) {
3183     // Verify that OS save/restore all bits of AVX registers
3184     // during signal processing.
3185     retVal = true;
3186     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3187       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3188         retVal = false;
3189         break;
3190       }
3191     }
3192     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3193     if (retVal == false) {
3194       // Verify that OS save/restore all bits of EVEX registers
3195       // during signal processing.
3196       retVal = true;
3197       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3198         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3199           retVal = false;
3200           break;
3201         }
3202       }
3203     }
3204   }
3205   return retVal;
3206 }
3207 
3208 bool VM_Version::os_supports_apx_egprs() {
3209   if (!supports_apx_f()) {
3210     return false;
3211   }
3212   // Enable APX support for product builds after
3213   // completion of planned features listed in JDK-8329030.
3214 #if !defined(PRODUCT)
3215   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3216       _cpuid_info.apx_save[1] != egpr_test_value()) {
3217     return false;
3218   }
3219   return true;
3220 #else
3221   return false;
3222 #endif
3223 }
3224 
3225 uint VM_Version::cores_per_cpu() {
3226   uint result = 1;
3227   if (is_intel()) {
3228     bool supports_topology = supports_processor_topology();
3229     if (supports_topology) {
3230       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3231                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3232     }
3233     if (!supports_topology || result == 0) {
3234       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3235     }
3236   } else if (is_amd_family()) {
3237     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3238   } else if (is_zx()) {
3239     bool supports_topology = supports_processor_topology();
3240     if (supports_topology) {
3241       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3242                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3243     }
3244     if (!supports_topology || result == 0) {
3245       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3246     }
3247   }
3248   return result;
3249 }
3250 
3251 uint VM_Version::threads_per_core() {
3252   uint result = 1;
3253   if (is_intel() && supports_processor_topology()) {
3254     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3255   } else if (is_zx() && supports_processor_topology()) {
3256     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3257   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3258     if (cpu_family() >= 0x17) {
3259       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3260     } else {
3261       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3262                  cores_per_cpu();
3263     }
3264   }
3265   return (result == 0 ? 1 : result);
3266 }
3267 
3268 uint VM_Version::L1_line_size() {
3269   uint result = 0;
3270   if (is_intel()) {
3271     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3272   } else if (is_amd_family()) {
3273     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3274   } else if (is_zx()) {
3275     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3276   }
3277   if (result < 32) // not defined ?
3278     result = 32;   // 32 bytes by default on x86 and other x64
3279   return result;
3280 }
3281 
3282 bool VM_Version::is_intel_tsc_synched_at_init() {
3283   if (is_intel_family_core()) {
3284     uint32_t ext_model = extended_cpu_model();
3285     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3286         ext_model == CPU_MODEL_WESTMERE_EP    ||
3287         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3288         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3289       // <= 2-socket invariant tsc support. EX versions are usually used
3290       // in > 2-socket systems and likely don't synchronize tscs at
3291       // initialization.
3292       // Code that uses tsc values must be prepared for them to arbitrarily
3293       // jump forward or backward.
3294       return true;
3295     }
3296   }
3297   return false;
3298 }
3299 
3300 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3301   // Hardware prefetching (distance/size in bytes):
3302   // Pentium 3 -  64 /  32
3303   // Pentium 4 - 256 / 128
3304   // Athlon    -  64 /  32 ????
3305   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3306   // Core      - 128 /  64
3307   //
3308   // Software prefetching (distance in bytes / instruction with best score):
3309   // Pentium 3 - 128 / prefetchnta
3310   // Pentium 4 - 512 / prefetchnta
3311   // Athlon    - 128 / prefetchnta
3312   // Opteron   - 256 / prefetchnta
3313   // Core      - 256 / prefetchnta
3314   // It will be used only when AllocatePrefetchStyle > 0
3315 
3316   if (is_amd_family()) { // AMD | Hygon
3317     if (supports_sse2()) {
3318       return 256; // Opteron
3319     } else {
3320       return 128; // Athlon
3321     }
3322   } else { // Intel
3323     if (supports_sse3() && cpu_family() == 6) {
3324       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3325         return 192;
3326       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3327 #ifdef _LP64
3328         return 384;
3329 #else
3330         return 320;
3331 #endif
3332       }
3333     }
3334     if (supports_sse2()) {
3335       if (cpu_family() == 6) {
3336         return 256; // Pentium M, Core, Core2
3337       } else {
3338         return 512; // Pentium 4
3339       }
3340     } else {
3341       return 128; // Pentium 3 (and all other old CPUs)
3342     }
3343   }
3344 }
3345 
3346 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3347   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3348   switch (id) {
3349   case vmIntrinsics::_floatToFloat16:
3350   case vmIntrinsics::_float16ToFloat:
3351     if (!supports_float16()) {
3352       return false;
3353     }
3354     break;
3355   default:
3356     break;
3357   }
3358   return true;
3359 }