1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/powerOfTwo.hpp"
  42 #include "utilities/virtualizationSupport.hpp"
  43 
  44 int VM_Version::_cpu;
  45 int VM_Version::_model;
  46 int VM_Version::_stepping;
  47 bool VM_Version::_has_intel_jcc_erratum;
  48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  49 
  50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  52 #undef DECLARE_CPU_FEATURE_FLAG
  53 
  54 // Address of instruction which causes SEGV
  55 address VM_Version::_cpuinfo_segv_addr = nullptr;
  56 // Address of instruction after the one which causes SEGV
  57 address VM_Version::_cpuinfo_cont_addr = nullptr;
  58 // Address of instruction which causes APX specific SEGV
  59 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  60 // Address of instruction after the one which causes APX specific SEGV
  61 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  62 
  63 static BufferBlob* stub_blob;
  64 static const int stub_size = 2000;
  65 
  66 extern "C" {
  67   typedef void (*get_cpu_info_stub_t)(void*);
  68   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  69   typedef void (*clear_apx_test_state_t)(void);
  70 }
  71 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  72 static detect_virt_stub_t detect_virt_stub = nullptr;
  73 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  74 
  75 #ifdef _LP64
  76 
  77 bool VM_Version::supports_clflush() {
  78   // clflush should always be available on x86_64
  79   // if not we are in real trouble because we rely on it
  80   // to flush the code cache.
  81   // Unfortunately, Assembler::clflush is currently called as part
  82   // of generation of the code cache flush routine. This happens
  83   // under Universe::init before the processor features are set
  84   // up. Assembler::flush calls this routine to check that clflush
  85   // is allowed. So, we give the caller a free pass if Universe init
  86   // is still in progress.
  87   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  88   return true;
  89 }
  90 #endif
  91 
  92 #define CPUID_STANDARD_FN   0x0
  93 #define CPUID_STANDARD_FN_1 0x1
  94 #define CPUID_STANDARD_FN_4 0x4
  95 #define CPUID_STANDARD_FN_B 0xb
  96 
  97 #define CPUID_EXTENDED_FN   0x80000000
  98 #define CPUID_EXTENDED_FN_1 0x80000001
  99 #define CPUID_EXTENDED_FN_2 0x80000002
 100 #define CPUID_EXTENDED_FN_3 0x80000003
 101 #define CPUID_EXTENDED_FN_4 0x80000004
 102 #define CPUID_EXTENDED_FN_7 0x80000007
 103 #define CPUID_EXTENDED_FN_8 0x80000008
 104 
 105 class VM_Version_StubGenerator: public StubCodeGenerator {
 106  public:
 107 
 108   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 109 
 110 #if defined(_LP64)
 111   address clear_apx_test_state() {
 112 #   define __ _masm->
 113     address start = __ pc();
 114     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 115     // handling guarantees that preserved register values post signal handling were
 116     // re-instantiated by operating system and not because they were not modified externally.
 117 
 118     bool save_apx = UseAPX;
 119     VM_Version::set_apx_cpuFeatures();
 120     UseAPX = true;
 121     // EGPR state save/restoration.
 122     __ mov64(r16, 0L);
 123     __ mov64(r31, 0L);
 124     UseAPX = save_apx;
 125     VM_Version::clean_cpuFeatures();
 126     __ ret(0);
 127     return start;
 128   }
 129 #endif
 130 
 131   address generate_get_cpu_info() {
 132     // Flags to test CPU type.
 133     const uint32_t HS_EFL_AC = 0x40000;
 134     const uint32_t HS_EFL_ID = 0x200000;
 135     // Values for when we don't have a CPUID instruction.
 136     const int      CPU_FAMILY_SHIFT = 8;
 137     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 138     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 139     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 140 
 141     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 142     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 143     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 144     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 145 
 146     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 147 #   define __ _masm->
 148 
 149     address start = __ pc();
 150 
 151     //
 152     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 153     //
 154     // LP64: rcx and rdx are first and second argument registers on windows
 155 
 156     __ push(rbp);
 157 #ifdef _LP64
 158     __ mov(rbp, c_rarg0); // cpuid_info address
 159 #else
 160     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 161 #endif
 162     __ push(rbx);
 163     __ push(rsi);
 164     __ pushf();          // preserve rbx, and flags
 165     __ pop(rax);
 166     __ push(rax);
 167     __ mov(rcx, rax);
 168     //
 169     // if we are unable to change the AC flag, we have a 386
 170     //
 171     __ xorl(rax, HS_EFL_AC);
 172     __ push(rax);
 173     __ popf();
 174     __ pushf();
 175     __ pop(rax);
 176     __ cmpptr(rax, rcx);
 177     __ jccb(Assembler::notEqual, detect_486);
 178 
 179     __ movl(rax, CPU_FAMILY_386);
 180     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 181     __ jmp(done);
 182 
 183     //
 184     // If we are unable to change the ID flag, we have a 486 which does
 185     // not support the "cpuid" instruction.
 186     //
 187     __ bind(detect_486);
 188     __ mov(rax, rcx);
 189     __ xorl(rax, HS_EFL_ID);
 190     __ push(rax);
 191     __ popf();
 192     __ pushf();
 193     __ pop(rax);
 194     __ cmpptr(rcx, rax);
 195     __ jccb(Assembler::notEqual, detect_586);
 196 
 197     __ bind(cpu486);
 198     __ movl(rax, CPU_FAMILY_486);
 199     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 200     __ jmp(done);
 201 
 202     //
 203     // At this point, we have a chip which supports the "cpuid" instruction
 204     //
 205     __ bind(detect_586);
 206     __ xorl(rax, rax);
 207     __ cpuid();
 208     __ orl(rax, rax);
 209     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 210                                         // value of at least 1, we give up and
 211                                         // assume a 486
 212     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 213     __ movl(Address(rsi, 0), rax);
 214     __ movl(Address(rsi, 4), rbx);
 215     __ movl(Address(rsi, 8), rcx);
 216     __ movl(Address(rsi,12), rdx);
 217 
 218     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 219     __ jccb(Assembler::belowEqual, std_cpuid4);
 220 
 221     //
 222     // cpuid(0xB) Processor Topology
 223     //
 224     __ movl(rax, 0xb);
 225     __ xorl(rcx, rcx);   // Threads level
 226     __ cpuid();
 227 
 228     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 229     __ movl(Address(rsi, 0), rax);
 230     __ movl(Address(rsi, 4), rbx);
 231     __ movl(Address(rsi, 8), rcx);
 232     __ movl(Address(rsi,12), rdx);
 233 
 234     __ movl(rax, 0xb);
 235     __ movl(rcx, 1);     // Cores level
 236     __ cpuid();
 237     __ push(rax);
 238     __ andl(rax, 0x1f);  // Determine if valid topology level
 239     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 240     __ andl(rax, 0xffff);
 241     __ pop(rax);
 242     __ jccb(Assembler::equal, std_cpuid4);
 243 
 244     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 245     __ movl(Address(rsi, 0), rax);
 246     __ movl(Address(rsi, 4), rbx);
 247     __ movl(Address(rsi, 8), rcx);
 248     __ movl(Address(rsi,12), rdx);
 249 
 250     __ movl(rax, 0xb);
 251     __ movl(rcx, 2);     // Packages level
 252     __ cpuid();
 253     __ push(rax);
 254     __ andl(rax, 0x1f);  // Determine if valid topology level
 255     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 256     __ andl(rax, 0xffff);
 257     __ pop(rax);
 258     __ jccb(Assembler::equal, std_cpuid4);
 259 
 260     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 261     __ movl(Address(rsi, 0), rax);
 262     __ movl(Address(rsi, 4), rbx);
 263     __ movl(Address(rsi, 8), rcx);
 264     __ movl(Address(rsi,12), rdx);
 265 
 266     //
 267     // cpuid(0x4) Deterministic cache params
 268     //
 269     __ bind(std_cpuid4);
 270     __ movl(rax, 4);
 271     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 272     __ jccb(Assembler::greater, std_cpuid1);
 273 
 274     __ xorl(rcx, rcx);   // L1 cache
 275     __ cpuid();
 276     __ push(rax);
 277     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 278     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 279     __ pop(rax);
 280     __ jccb(Assembler::equal, std_cpuid1);
 281 
 282     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 283     __ movl(Address(rsi, 0), rax);
 284     __ movl(Address(rsi, 4), rbx);
 285     __ movl(Address(rsi, 8), rcx);
 286     __ movl(Address(rsi,12), rdx);
 287 
 288     //
 289     // Standard cpuid(0x1)
 290     //
 291     __ bind(std_cpuid1);
 292     __ movl(rax, 1);
 293     __ cpuid();
 294     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 295     __ movl(Address(rsi, 0), rax);
 296     __ movl(Address(rsi, 4), rbx);
 297     __ movl(Address(rsi, 8), rcx);
 298     __ movl(Address(rsi,12), rdx);
 299 
 300     //
 301     // Check if OS has enabled XGETBV instruction to access XCR0
 302     // (OSXSAVE feature flag) and CPU supports AVX
 303     //
 304     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 305     __ cmpl(rcx, 0x18000000);
 306     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 307 
 308     //
 309     // XCR0, XFEATURE_ENABLED_MASK register
 310     //
 311     __ xorl(rcx, rcx);   // zero for XCR0 register
 312     __ xgetbv();
 313     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 314     __ movl(Address(rsi, 0), rax);
 315     __ movl(Address(rsi, 4), rdx);
 316 
 317     //
 318     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 319     //
 320     __ bind(sef_cpuid);
 321     __ movl(rax, 7);
 322     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 323     __ jccb(Assembler::greater, ext_cpuid);
 324     // ECX = 0
 325     __ xorl(rcx, rcx);
 326     __ cpuid();
 327     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 328     __ movl(Address(rsi, 0), rax);
 329     __ movl(Address(rsi, 4), rbx);
 330     __ movl(Address(rsi, 8), rcx);
 331     __ movl(Address(rsi, 12), rdx);
 332 
 333     //
 334     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 335     //
 336     __ bind(sefsl1_cpuid);
 337     __ movl(rax, 7);
 338     __ movl(rcx, 1);
 339     __ cpuid();
 340     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 341     __ movl(Address(rsi, 0), rax);
 342     __ movl(Address(rsi, 4), rdx);
 343 
 344     //
 345     // Extended cpuid(0x80000000)
 346     //
 347     __ bind(ext_cpuid);
 348     __ movl(rax, 0x80000000);
 349     __ cpuid();
 350     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 351     __ jcc(Assembler::belowEqual, done);
 352     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 353     __ jcc(Assembler::belowEqual, ext_cpuid1);
 354     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 355     __ jccb(Assembler::belowEqual, ext_cpuid5);
 356     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 357     __ jccb(Assembler::belowEqual, ext_cpuid7);
 358     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 359     __ jccb(Assembler::belowEqual, ext_cpuid8);
 360     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 361     __ jccb(Assembler::below, ext_cpuid8);
 362     //
 363     // Extended cpuid(0x8000001E)
 364     //
 365     __ movl(rax, 0x8000001E);
 366     __ cpuid();
 367     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 368     __ movl(Address(rsi, 0), rax);
 369     __ movl(Address(rsi, 4), rbx);
 370     __ movl(Address(rsi, 8), rcx);
 371     __ movl(Address(rsi,12), rdx);
 372 
 373     //
 374     // Extended cpuid(0x80000008)
 375     //
 376     __ bind(ext_cpuid8);
 377     __ movl(rax, 0x80000008);
 378     __ cpuid();
 379     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 380     __ movl(Address(rsi, 0), rax);
 381     __ movl(Address(rsi, 4), rbx);
 382     __ movl(Address(rsi, 8), rcx);
 383     __ movl(Address(rsi,12), rdx);
 384 
 385     //
 386     // Extended cpuid(0x80000007)
 387     //
 388     __ bind(ext_cpuid7);
 389     __ movl(rax, 0x80000007);
 390     __ cpuid();
 391     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 392     __ movl(Address(rsi, 0), rax);
 393     __ movl(Address(rsi, 4), rbx);
 394     __ movl(Address(rsi, 8), rcx);
 395     __ movl(Address(rsi,12), rdx);
 396 
 397     //
 398     // Extended cpuid(0x80000005)
 399     //
 400     __ bind(ext_cpuid5);
 401     __ movl(rax, 0x80000005);
 402     __ cpuid();
 403     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 404     __ movl(Address(rsi, 0), rax);
 405     __ movl(Address(rsi, 4), rbx);
 406     __ movl(Address(rsi, 8), rcx);
 407     __ movl(Address(rsi,12), rdx);
 408 
 409     //
 410     // Extended cpuid(0x80000001)
 411     //
 412     __ bind(ext_cpuid1);
 413     __ movl(rax, 0x80000001);
 414     __ cpuid();
 415     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 416     __ movl(Address(rsi, 0), rax);
 417     __ movl(Address(rsi, 4), rbx);
 418     __ movl(Address(rsi, 8), rcx);
 419     __ movl(Address(rsi,12), rdx);
 420 
 421 #if defined(_LP64)
 422     //
 423     // Check if OS has enabled XGETBV instruction to access XCR0
 424     // (OSXSAVE feature flag) and CPU supports APX
 425     //
 426     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 427     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 428     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 429     __ movl(rax, 0x200000);
 430     __ andl(rax, Address(rsi, 4));
 431     __ cmpl(rax, 0x200000);
 432     __ jcc(Assembler::notEqual, vector_save_restore);
 433     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 434     __ movl(rax, 0x80000);
 435     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 436     __ cmpl(rax, 0x80000);
 437     __ jcc(Assembler::notEqual, vector_save_restore);
 438 
 439 #ifndef PRODUCT
 440     bool save_apx = UseAPX;
 441     VM_Version::set_apx_cpuFeatures();
 442     UseAPX = true;
 443     __ mov64(r16, VM_Version::egpr_test_value());
 444     __ mov64(r31, VM_Version::egpr_test_value());
 445     __ xorl(rsi, rsi);
 446     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 447     // Generate SEGV
 448     __ movl(rax, Address(rsi, 0));
 449 
 450     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 451     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 452     __ movq(Address(rsi, 0), r16);
 453     __ movq(Address(rsi, 8), r31);
 454 
 455     UseAPX = save_apx;
 456 #endif
 457 #endif
 458     __ bind(vector_save_restore);
 459     //
 460     // Check if OS has enabled XGETBV instruction to access XCR0
 461     // (OSXSAVE feature flag) and CPU supports AVX
 462     //
 463     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 464     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 465     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 466     __ cmpl(rcx, 0x18000000);
 467     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 468 
 469     __ movl(rax, 0x6);
 470     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 471     __ cmpl(rax, 0x6);
 472     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 473 
 474     // we need to bridge farther than imm8, so we use this island as a thunk
 475     __ bind(done);
 476     __ jmp(wrapup);
 477 
 478     __ bind(start_simd_check);
 479     //
 480     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 481     // registers are not restored after a signal processing.
 482     // Generate SEGV here (reference through null)
 483     // and check upper YMM/ZMM bits after it.
 484     //
 485     int saved_useavx = UseAVX;
 486     int saved_usesse = UseSSE;
 487 
 488     // If UseAVX is uninitialized or is set by the user to include EVEX
 489     if (use_evex) {
 490       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 491       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 492       __ movl(rax, 0x10000);
 493       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 494       __ cmpl(rax, 0x10000);
 495       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 496       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 497       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 498       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 499       __ movl(rax, 0xE0);
 500       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 501       __ cmpl(rax, 0xE0);
 502       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 503 
 504       if (FLAG_IS_DEFAULT(UseAVX)) {
 505         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 506         __ movl(rax, Address(rsi, 0));
 507         __ cmpl(rax, 0x50654);              // If it is Skylake
 508         __ jcc(Assembler::equal, legacy_setup);
 509       }
 510       // EVEX setup: run in lowest evex mode
 511       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 512       UseAVX = 3;
 513       UseSSE = 2;
 514 #ifdef _WINDOWS
 515       // xmm5-xmm15 are not preserved by caller on windows
 516       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 517       __ subptr(rsp, 64);
 518       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 519       __ subptr(rsp, 64);
 520       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 521       __ subptr(rsp, 64);
 522       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 523 #endif // _WINDOWS
 524 
 525       // load value into all 64 bytes of zmm7 register
 526       __ movl(rcx, VM_Version::ymm_test_value());
 527       __ movdl(xmm0, rcx);
 528       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 529       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 530 #ifdef _LP64
 531       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 532       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 533 #endif
 534       VM_Version::clean_cpuFeatures();
 535       __ jmp(save_restore_except);
 536     }
 537 
 538     __ bind(legacy_setup);
 539     // AVX setup
 540     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 541     UseAVX = 1;
 542     UseSSE = 2;
 543 #ifdef _WINDOWS
 544     __ subptr(rsp, 32);
 545     __ vmovdqu(Address(rsp, 0), xmm7);
 546     __ subptr(rsp, 32);
 547     __ vmovdqu(Address(rsp, 0), xmm8);
 548     __ subptr(rsp, 32);
 549     __ vmovdqu(Address(rsp, 0), xmm15);
 550 #endif // _WINDOWS
 551 
 552     // load value into all 32 bytes of ymm7 register
 553     __ movl(rcx, VM_Version::ymm_test_value());
 554 
 555     __ movdl(xmm0, rcx);
 556     __ pshufd(xmm0, xmm0, 0x00);
 557     __ vinsertf128_high(xmm0, xmm0);
 558     __ vmovdqu(xmm7, xmm0);
 559 #ifdef _LP64
 560     __ vmovdqu(xmm8, xmm0);
 561     __ vmovdqu(xmm15, xmm0);
 562 #endif
 563     VM_Version::clean_cpuFeatures();
 564 
 565     __ bind(save_restore_except);
 566     __ xorl(rsi, rsi);
 567     VM_Version::set_cpuinfo_segv_addr(__ pc());
 568     // Generate SEGV
 569     __ movl(rax, Address(rsi, 0));
 570 
 571     VM_Version::set_cpuinfo_cont_addr(__ pc());
 572     // Returns here after signal. Save xmm0 to check it later.
 573 
 574     // If UseAVX is uninitialized or is set by the user to include EVEX
 575     if (use_evex) {
 576       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 577       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 578       __ movl(rax, 0x10000);
 579       __ andl(rax, Address(rsi, 4));
 580       __ cmpl(rax, 0x10000);
 581       __ jcc(Assembler::notEqual, legacy_save_restore);
 582       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 583       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 584       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 585       __ movl(rax, 0xE0);
 586       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 587       __ cmpl(rax, 0xE0);
 588       __ jcc(Assembler::notEqual, legacy_save_restore);
 589 
 590       if (FLAG_IS_DEFAULT(UseAVX)) {
 591         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 592         __ movl(rax, Address(rsi, 0));
 593         __ cmpl(rax, 0x50654);              // If it is Skylake
 594         __ jcc(Assembler::equal, legacy_save_restore);
 595       }
 596       // EVEX check: run in lowest evex mode
 597       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 598       UseAVX = 3;
 599       UseSSE = 2;
 600       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 601       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 602       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 603 #ifdef _LP64
 604       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 605       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 606 #endif
 607 
 608 #ifdef _WINDOWS
 609       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 610       __ addptr(rsp, 64);
 611       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 612       __ addptr(rsp, 64);
 613       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 614       __ addptr(rsp, 64);
 615 #endif // _WINDOWS
 616       generate_vzeroupper(wrapup);
 617       VM_Version::clean_cpuFeatures();
 618       UseAVX = saved_useavx;
 619       UseSSE = saved_usesse;
 620       __ jmp(wrapup);
 621    }
 622 
 623     __ bind(legacy_save_restore);
 624     // AVX check
 625     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 626     UseAVX = 1;
 627     UseSSE = 2;
 628     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 629     __ vmovdqu(Address(rsi, 0), xmm0);
 630     __ vmovdqu(Address(rsi, 32), xmm7);
 631 #ifdef _LP64
 632     __ vmovdqu(Address(rsi, 64), xmm8);
 633     __ vmovdqu(Address(rsi, 96), xmm15);
 634 #endif
 635 
 636 #ifdef _WINDOWS
 637     __ vmovdqu(xmm15, Address(rsp, 0));
 638     __ addptr(rsp, 32);
 639     __ vmovdqu(xmm8, Address(rsp, 0));
 640     __ addptr(rsp, 32);
 641     __ vmovdqu(xmm7, Address(rsp, 0));
 642     __ addptr(rsp, 32);
 643 #endif // _WINDOWS
 644 
 645     generate_vzeroupper(wrapup);
 646     VM_Version::clean_cpuFeatures();
 647     UseAVX = saved_useavx;
 648     UseSSE = saved_usesse;
 649 
 650     __ bind(wrapup);
 651     __ popf();
 652     __ pop(rsi);
 653     __ pop(rbx);
 654     __ pop(rbp);
 655     __ ret(0);
 656 
 657 #   undef __
 658 
 659     return start;
 660   };
 661   void generate_vzeroupper(Label& L_wrapup) {
 662 #   define __ _masm->
 663     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 664     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 665     __ jcc(Assembler::notEqual, L_wrapup);
 666     __ movl(rcx, 0x0FFF0FF0);
 667     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 668     __ andl(rcx, Address(rsi, 0));
 669     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 670     __ jcc(Assembler::equal, L_wrapup);
 671     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 672     __ jcc(Assembler::equal, L_wrapup);
 673     // vzeroupper() will use a pre-computed instruction sequence that we
 674     // can't compute until after we've determined CPU capabilities. Use
 675     // uncached variant here directly to be able to bootstrap correctly
 676     __ vzeroupper_uncached();
 677 #   undef __
 678   }
 679   address generate_detect_virt() {
 680     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 681 #   define __ _masm->
 682 
 683     address start = __ pc();
 684 
 685     // Evacuate callee-saved registers
 686     __ push(rbp);
 687     __ push(rbx);
 688     __ push(rsi); // for Windows
 689 
 690 #ifdef _LP64
 691     __ mov(rax, c_rarg0); // CPUID leaf
 692     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 693 #else
 694     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 695     __ movptr(rsi, Address(rsp, 20)); // register array address
 696 #endif
 697 
 698     __ cpuid();
 699 
 700     // Store result to register array
 701     __ movl(Address(rsi,  0), rax);
 702     __ movl(Address(rsi,  4), rbx);
 703     __ movl(Address(rsi,  8), rcx);
 704     __ movl(Address(rsi, 12), rdx);
 705 
 706     // Epilogue
 707     __ pop(rsi);
 708     __ pop(rbx);
 709     __ pop(rbp);
 710     __ ret(0);
 711 
 712 #   undef __
 713 
 714     return start;
 715   };
 716 
 717 
 718   address generate_getCPUIDBrandString(void) {
 719     // Flags to test CPU type.
 720     const uint32_t HS_EFL_AC           = 0x40000;
 721     const uint32_t HS_EFL_ID           = 0x200000;
 722     // Values for when we don't have a CPUID instruction.
 723     const int      CPU_FAMILY_SHIFT = 8;
 724     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 725     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 726 
 727     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 728 
 729     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 730 #   define __ _masm->
 731 
 732     address start = __ pc();
 733 
 734     //
 735     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 736     //
 737     // LP64: rcx and rdx are first and second argument registers on windows
 738 
 739     __ push(rbp);
 740 #ifdef _LP64
 741     __ mov(rbp, c_rarg0); // cpuid_info address
 742 #else
 743     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 744 #endif
 745     __ push(rbx);
 746     __ push(rsi);
 747     __ pushf();          // preserve rbx, and flags
 748     __ pop(rax);
 749     __ push(rax);
 750     __ mov(rcx, rax);
 751     //
 752     // if we are unable to change the AC flag, we have a 386
 753     //
 754     __ xorl(rax, HS_EFL_AC);
 755     __ push(rax);
 756     __ popf();
 757     __ pushf();
 758     __ pop(rax);
 759     __ cmpptr(rax, rcx);
 760     __ jccb(Assembler::notEqual, detect_486);
 761 
 762     __ movl(rax, CPU_FAMILY_386);
 763     __ jmp(done);
 764 
 765     //
 766     // If we are unable to change the ID flag, we have a 486 which does
 767     // not support the "cpuid" instruction.
 768     //
 769     __ bind(detect_486);
 770     __ mov(rax, rcx);
 771     __ xorl(rax, HS_EFL_ID);
 772     __ push(rax);
 773     __ popf();
 774     __ pushf();
 775     __ pop(rax);
 776     __ cmpptr(rcx, rax);
 777     __ jccb(Assembler::notEqual, detect_586);
 778 
 779     __ bind(cpu486);
 780     __ movl(rax, CPU_FAMILY_486);
 781     __ jmp(done);
 782 
 783     //
 784     // At this point, we have a chip which supports the "cpuid" instruction
 785     //
 786     __ bind(detect_586);
 787     __ xorl(rax, rax);
 788     __ cpuid();
 789     __ orl(rax, rax);
 790     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 791                                         // value of at least 1, we give up and
 792                                         // assume a 486
 793 
 794     //
 795     // Extended cpuid(0x80000000) for processor brand string detection
 796     //
 797     __ bind(ext_cpuid);
 798     __ movl(rax, CPUID_EXTENDED_FN);
 799     __ cpuid();
 800     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 801     __ jcc(Assembler::below, done);
 802 
 803     //
 804     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 805     //
 806     __ movl(rax, CPUID_EXTENDED_FN_2);
 807     __ cpuid();
 808     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 809     __ movl(Address(rsi, 0), rax);
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 811     __ movl(Address(rsi, 0), rbx);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 813     __ movl(Address(rsi, 0), rcx);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 815     __ movl(Address(rsi,0), rdx);
 816 
 817     //
 818     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 819     //
 820     __ movl(rax, CPUID_EXTENDED_FN_3);
 821     __ cpuid();
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 823     __ movl(Address(rsi, 0), rax);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 825     __ movl(Address(rsi, 0), rbx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 827     __ movl(Address(rsi, 0), rcx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 829     __ movl(Address(rsi,0), rdx);
 830 
 831     //
 832     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 833     //
 834     __ movl(rax, CPUID_EXTENDED_FN_4);
 835     __ cpuid();
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 837     __ movl(Address(rsi, 0), rax);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 839     __ movl(Address(rsi, 0), rbx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 841     __ movl(Address(rsi, 0), rcx);
 842     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 843     __ movl(Address(rsi,0), rdx);
 844 
 845     //
 846     // return
 847     //
 848     __ bind(done);
 849     __ popf();
 850     __ pop(rsi);
 851     __ pop(rbx);
 852     __ pop(rbp);
 853     __ ret(0);
 854 
 855 #   undef __
 856 
 857     return start;
 858   };
 859 };
 860 
 861 void VM_Version::get_processor_features() {
 862 
 863   _cpu = 4; // 486 by default
 864   _model = 0;
 865   _stepping = 0;
 866   _features = 0;
 867   _logical_processors_per_package = 1;
 868   // i486 internal cache is both I&D and has a 16-byte line size
 869   _L1_data_cache_line_size = 16;
 870 
 871   // Get raw processor info
 872 
 873   get_cpu_info_stub(&_cpuid_info);
 874 
 875   assert_is_initialized();
 876   _cpu = extended_cpu_family();
 877   _model = extended_cpu_model();
 878   _stepping = cpu_stepping();
 879 
 880   if (cpu_family() > 4) { // it supports CPUID
 881     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 882     _cpu_features = _features;   // Preserve features
 883     // Logical processors are only available on P4s and above,
 884     // and only if hyperthreading is available.
 885     _logical_processors_per_package = logical_processor_count();
 886     _L1_data_cache_line_size = L1_line_size();
 887   }
 888 
 889   // xchg and xadd instructions
 890   _supports_atomic_getset4 = true;
 891   _supports_atomic_getadd4 = true;
 892   LP64_ONLY(_supports_atomic_getset8 = true);
 893   LP64_ONLY(_supports_atomic_getadd8 = true);
 894 
 895 #ifdef _LP64
 896   // OS should support SSE for x64 and hardware should support at least SSE2.
 897   if (!VM_Version::supports_sse2()) {
 898     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 899   }
 900   // in 64 bit the use of SSE2 is the minimum
 901   if (UseSSE < 2) UseSSE = 2;
 902 #endif
 903 
 904 #ifdef AMD64
 905   // flush_icache_stub have to be generated first.
 906   // That is why Icache line size is hard coded in ICache class,
 907   // see icache_x86.hpp. It is also the reason why we can't use
 908   // clflush instruction in 32-bit VM since it could be running
 909   // on CPU which does not support it.
 910   //
 911   // The only thing we can do is to verify that flushed
 912   // ICache::line_size has correct value.
 913   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 914   // clflush_size is size in quadwords (8 bytes).
 915   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 916 #endif
 917 
 918 #ifdef _LP64
 919   // assigning this field effectively enables Unsafe.writebackMemory()
 920   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 921   // that is only implemented on x86_64 and only if the OS plays ball
 922   if (os::supports_map_sync()) {
 923     // publish data cache line flush size to generic field, otherwise
 924     // let if default to zero thereby disabling writeback
 925     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 926   }
 927 #endif
 928 
 929   // Check if processor has Intel Ecore
 930   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 931     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 932       _model == 0xCC || _model == 0xDD)) {
 933     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 934   }
 935 
 936   if (UseSSE < 4) {
 937     _features &= ~CPU_SSE4_1;
 938     _features &= ~CPU_SSE4_2;
 939   }
 940 
 941   if (UseSSE < 3) {
 942     _features &= ~CPU_SSE3;
 943     _features &= ~CPU_SSSE3;
 944     _features &= ~CPU_SSE4A;
 945   }
 946 
 947   if (UseSSE < 2)
 948     _features &= ~CPU_SSE2;
 949 
 950   if (UseSSE < 1)
 951     _features &= ~CPU_SSE;
 952 
 953   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 954   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 955     UseAVX = 0;
 956   }
 957 
 958   // UseSSE is set to the smaller of what hardware supports and what
 959   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 960   // older Pentiums which do not support it.
 961   int use_sse_limit = 0;
 962   if (UseSSE > 0) {
 963     if (UseSSE > 3 && supports_sse4_1()) {
 964       use_sse_limit = 4;
 965     } else if (UseSSE > 2 && supports_sse3()) {
 966       use_sse_limit = 3;
 967     } else if (UseSSE > 1 && supports_sse2()) {
 968       use_sse_limit = 2;
 969     } else if (UseSSE > 0 && supports_sse()) {
 970       use_sse_limit = 1;
 971     } else {
 972       use_sse_limit = 0;
 973     }
 974   }
 975   if (FLAG_IS_DEFAULT(UseSSE)) {
 976     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 977   } else if (UseSSE > use_sse_limit) {
 978     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 979     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 980   }
 981 
 982   // first try initial setting and detect what we can support
 983   int use_avx_limit = 0;
 984   if (UseAVX > 0) {
 985     if (UseSSE < 4) {
 986       // Don't use AVX if SSE is unavailable or has been disabled.
 987       use_avx_limit = 0;
 988     } else if (UseAVX > 2 && supports_evex()) {
 989       use_avx_limit = 3;
 990     } else if (UseAVX > 1 && supports_avx2()) {
 991       use_avx_limit = 2;
 992     } else if (UseAVX > 0 && supports_avx()) {
 993       use_avx_limit = 1;
 994     } else {
 995       use_avx_limit = 0;
 996     }
 997   }
 998   if (FLAG_IS_DEFAULT(UseAVX)) {
 999     // Don't use AVX-512 on older Skylakes unless explicitly requested.
1000     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1001       FLAG_SET_DEFAULT(UseAVX, 2);
1002     } else {
1003       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1004     }
1005   }
1006 
1007   if (UseAVX > use_avx_limit) {
1008     if (UseSSE < 4) {
1009       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1010     } else {
1011       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1012     }
1013     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1014   }
1015 
1016   if (UseAVX < 3) {
1017     _features &= ~CPU_AVX512F;
1018     _features &= ~CPU_AVX512DQ;
1019     _features &= ~CPU_AVX512CD;
1020     _features &= ~CPU_AVX512BW;
1021     _features &= ~CPU_AVX512VL;
1022     _features &= ~CPU_AVX512_VPOPCNTDQ;
1023     _features &= ~CPU_AVX512_VPCLMULQDQ;
1024     _features &= ~CPU_AVX512_VAES;
1025     _features &= ~CPU_AVX512_VNNI;
1026     _features &= ~CPU_AVX512_VBMI;
1027     _features &= ~CPU_AVX512_VBMI2;
1028     _features &= ~CPU_AVX512_BITALG;
1029     _features &= ~CPU_AVX512_IFMA;
1030     _features &= ~CPU_APX_F;
1031     _features &= ~CPU_AVX512_FP16;
1032   }
1033 
1034   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1035   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1036   if (UseAPX && !apx_supported) {
1037     warning("UseAPX is not supported on this CPU, setting it to false");
1038     FLAG_SET_DEFAULT(UseAPX, false);
1039   } else if (FLAG_IS_DEFAULT(UseAPX)) {
1040     FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1041   }
1042 
1043   if (!UseAPX) {
1044     _features &= ~CPU_APX_F;
1045   }
1046 
1047   if (UseAVX < 2) {
1048     _features &= ~CPU_AVX2;
1049     _features &= ~CPU_AVX_IFMA;
1050   }
1051 
1052   if (UseAVX < 1) {
1053     _features &= ~CPU_AVX;
1054     _features &= ~CPU_VZEROUPPER;
1055     _features &= ~CPU_F16C;
1056     _features &= ~CPU_SHA512;
1057   }
1058 
1059   if (logical_processors_per_package() == 1) {
1060     // HT processor could be installed on a system which doesn't support HT.
1061     _features &= ~CPU_HT;
1062   }
1063 
1064   if (is_intel()) { // Intel cpus specific settings
1065     if (is_knights_family()) {
1066       _features &= ~CPU_VZEROUPPER;
1067       _features &= ~CPU_AVX512BW;
1068       _features &= ~CPU_AVX512VL;
1069       _features &= ~CPU_AVX512DQ;
1070       _features &= ~CPU_AVX512_VNNI;
1071       _features &= ~CPU_AVX512_VAES;
1072       _features &= ~CPU_AVX512_VPOPCNTDQ;
1073       _features &= ~CPU_AVX512_VPCLMULQDQ;
1074       _features &= ~CPU_AVX512_VBMI;
1075       _features &= ~CPU_AVX512_VBMI2;
1076       _features &= ~CPU_CLWB;
1077       _features &= ~CPU_FLUSHOPT;
1078       _features &= ~CPU_GFNI;
1079       _features &= ~CPU_AVX512_BITALG;
1080       _features &= ~CPU_AVX512_IFMA;
1081       _features &= ~CPU_AVX_IFMA;
1082       _features &= ~CPU_AVX512_FP16;
1083     }
1084   }
1085 
1086   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1087     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1088   } else {
1089     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1090   }
1091 
1092   char buf[1024];
1093   int res = jio_snprintf(
1094               buf, sizeof(buf),
1095               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1096               cores_per_cpu(), threads_per_core(),
1097               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1098   assert(res > 0, "not enough temporary space allocated");
1099   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1100 
1101   _features_string = os::strdup(buf);
1102 
1103   // Use AES instructions if available.
1104   if (supports_aes()) {
1105     if (FLAG_IS_DEFAULT(UseAES)) {
1106       FLAG_SET_DEFAULT(UseAES, true);
1107     }
1108     if (!UseAES) {
1109       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1110         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1111       }
1112       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1113     } else {
1114       if (UseSSE > 2) {
1115         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1116           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1117         }
1118       } else {
1119         // The AES intrinsic stubs require AES instruction support (of course)
1120         // but also require sse3 mode or higher for instructions it use.
1121         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1122           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1123         }
1124         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1125       }
1126 
1127       // --AES-CTR begins--
1128       if (!UseAESIntrinsics) {
1129         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1130           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1131           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1132         }
1133       } else {
1134         if (supports_sse4_1()) {
1135           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1136             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1137           }
1138         } else {
1139            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1140            // but also require sse4.1 mode or higher for instructions it use.
1141           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1142              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1143            }
1144            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1145         }
1146       }
1147       // --AES-CTR ends--
1148     }
1149   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1150     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1151       warning("AES instructions are not available on this CPU");
1152       FLAG_SET_DEFAULT(UseAES, false);
1153     }
1154     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1155       warning("AES intrinsics are not available on this CPU");
1156       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1157     }
1158     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1159       warning("AES-CTR intrinsics are not available on this CPU");
1160       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1161     }
1162   }
1163 
1164   // Use CLMUL instructions if available.
1165   if (supports_clmul()) {
1166     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1167       UseCLMUL = true;
1168     }
1169   } else if (UseCLMUL) {
1170     if (!FLAG_IS_DEFAULT(UseCLMUL))
1171       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1172     FLAG_SET_DEFAULT(UseCLMUL, false);
1173   }
1174 
1175   if (UseCLMUL && (UseSSE > 2)) {
1176     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1177       UseCRC32Intrinsics = true;
1178     }
1179   } else if (UseCRC32Intrinsics) {
1180     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1181       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1182     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1183   }
1184 
1185 #ifdef _LP64
1186   if (supports_avx2()) {
1187     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1188       UseAdler32Intrinsics = true;
1189     }
1190   } else if (UseAdler32Intrinsics) {
1191     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1192       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1193     }
1194     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1195   }
1196 #else
1197   if (UseAdler32Intrinsics) {
1198     warning("Adler32Intrinsics not available on this CPU.");
1199     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1200   }
1201 #endif
1202 
1203   if (supports_sse4_2() && supports_clmul()) {
1204     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1205       UseCRC32CIntrinsics = true;
1206     }
1207   } else if (UseCRC32CIntrinsics) {
1208     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1209       warning("CRC32C intrinsics are not available on this CPU");
1210     }
1211     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1212   }
1213 
1214   // GHASH/GCM intrinsics
1215   if (UseCLMUL && (UseSSE > 2)) {
1216     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1217       UseGHASHIntrinsics = true;
1218     }
1219   } else if (UseGHASHIntrinsics) {
1220     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1221       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1222     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1223   }
1224 
1225 #ifdef _LP64
1226   // ChaCha20 Intrinsics
1227   // As long as the system supports AVX as a baseline we can do a
1228   // SIMD-enabled block function.  StubGenerator makes the determination
1229   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1230   // version.
1231   if (UseAVX >= 1) {
1232       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1233           UseChaCha20Intrinsics = true;
1234       }
1235   } else if (UseChaCha20Intrinsics) {
1236       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1237           warning("ChaCha20 intrinsic requires AVX instructions");
1238       }
1239       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1240   }
1241 #else
1242   // No support currently for ChaCha20 intrinsics on 32-bit platforms
1243   if (UseChaCha20Intrinsics) {
1244       warning("ChaCha20 intrinsics are not available on this CPU.");
1245       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1246   }
1247 #endif // _LP64
1248 
1249   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1250   if (UseAVX >= 2) {
1251     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1252       UseBASE64Intrinsics = true;
1253     }
1254   } else if (UseBASE64Intrinsics) {
1255      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1256       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1257     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1258   }
1259 
1260   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1261     if (FLAG_IS_DEFAULT(UseFMA)) {
1262       UseFMA = true;
1263     }
1264   } else if (UseFMA) {
1265     warning("FMA instructions are not available on this CPU");
1266     FLAG_SET_DEFAULT(UseFMA, false);
1267   }
1268 
1269   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1270     UseMD5Intrinsics = true;
1271   }
1272 
1273   if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1274     if (FLAG_IS_DEFAULT(UseSHA)) {
1275       UseSHA = true;
1276     }
1277   } else if (UseSHA) {
1278     warning("SHA instructions are not available on this CPU");
1279     FLAG_SET_DEFAULT(UseSHA, false);
1280   }
1281 
1282   if (supports_sha() && supports_sse4_1() && UseSHA) {
1283     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1284       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1285     }
1286   } else if (UseSHA1Intrinsics) {
1287     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1288     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1289   }
1290 
1291   if (supports_sse4_1() && UseSHA) {
1292     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1293       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1294     }
1295   } else if (UseSHA256Intrinsics) {
1296     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1297     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1298   }
1299 
1300 #ifdef _LP64
1301   // These are only supported on 64-bit
1302   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1303     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1304       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1305     }
1306   } else
1307 #endif
1308   if (UseSHA512Intrinsics) {
1309     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1310     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1311   }
1312 
1313 #ifdef _LP64
1314   if (supports_evex() && supports_avx512bw()) {
1315       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1316           UseSHA3Intrinsics = true;
1317       }
1318   } else
1319 #endif
1320    if (UseSHA3Intrinsics) {
1321       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1322       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1323   }
1324 
1325   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1326     FLAG_SET_DEFAULT(UseSHA, false);
1327   }
1328 
1329 #ifdef COMPILER2
1330   if (UseFPUForSpilling) {
1331     if (UseSSE < 2) {
1332       // Only supported with SSE2+
1333       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1334     }
1335   }
1336 #endif
1337 
1338 #if COMPILER2_OR_JVMCI
1339   int max_vector_size = 0;
1340   if (UseSSE < 2) {
1341     // Vectors (in XMM) are only supported with SSE2+
1342     // SSE is always 2 on x64.
1343     max_vector_size = 0;
1344   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1345     // 16 byte vectors (in XMM) are supported with SSE2+
1346     max_vector_size = 16;
1347   } else if (UseAVX == 1 || UseAVX == 2) {
1348     // 32 bytes vectors (in YMM) are only supported with AVX+
1349     max_vector_size = 32;
1350   } else if (UseAVX > 2) {
1351     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1352     max_vector_size = 64;
1353   }
1354 
1355 #ifdef _LP64
1356   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1357 #else
1358   int min_vector_size = 0;
1359 #endif
1360 
1361   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1362     if (MaxVectorSize < min_vector_size) {
1363       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1364       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1365     }
1366     if (MaxVectorSize > max_vector_size) {
1367       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1368       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1369     }
1370     if (!is_power_of_2(MaxVectorSize)) {
1371       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1372       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1373     }
1374   } else {
1375     // If default, use highest supported configuration
1376     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1377   }
1378 
1379 #if defined(COMPILER2) && defined(ASSERT)
1380   if (MaxVectorSize > 0) {
1381     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1382       tty->print_cr("State of YMM registers after signal handle:");
1383       int nreg = 2 LP64_ONLY(+2);
1384       const char* ymm_name[4] = {"0", "7", "8", "15"};
1385       for (int i = 0; i < nreg; i++) {
1386         tty->print("YMM%s:", ymm_name[i]);
1387         for (int j = 7; j >=0; j--) {
1388           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1389         }
1390         tty->cr();
1391       }
1392     }
1393   }
1394 #endif // COMPILER2 && ASSERT
1395 
1396 #ifdef _LP64
1397   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1398     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1399       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1400     }
1401   } else
1402 #endif
1403   if (UsePoly1305Intrinsics) {
1404     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1405     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1406   }
1407 
1408 #ifdef _LP64
1409   if (supports_avx512ifma() && supports_avx512vlbw()) {
1410     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1411       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1412     }
1413   } else
1414 #endif
1415   if (UseIntPolyIntrinsics) {
1416     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1417     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1418   }
1419 
1420 #ifdef _LP64
1421   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1422     UseMultiplyToLenIntrinsic = true;
1423   }
1424   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1425     UseSquareToLenIntrinsic = true;
1426   }
1427   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1428     UseMulAddIntrinsic = true;
1429   }
1430   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1431     UseMontgomeryMultiplyIntrinsic = true;
1432   }
1433   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1434     UseMontgomerySquareIntrinsic = true;
1435   }
1436 #else
1437   if (UseMultiplyToLenIntrinsic) {
1438     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1439       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1440     }
1441     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1442   }
1443   if (UseMontgomeryMultiplyIntrinsic) {
1444     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1445       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1446     }
1447     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1448   }
1449   if (UseMontgomerySquareIntrinsic) {
1450     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1451       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1452     }
1453     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1454   }
1455   if (UseSquareToLenIntrinsic) {
1456     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1457       warning("squareToLen intrinsic is not available in 32-bit VM");
1458     }
1459     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1460   }
1461   if (UseMulAddIntrinsic) {
1462     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1463       warning("mulAdd intrinsic is not available in 32-bit VM");
1464     }
1465     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1466   }
1467 #endif // _LP64
1468 #endif // COMPILER2_OR_JVMCI
1469 
1470   // On new cpus instructions which update whole XMM register should be used
1471   // to prevent partial register stall due to dependencies on high half.
1472   //
1473   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1474   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1475   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1476   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1477 
1478 
1479   if (is_zx()) { // ZX cpus specific settings
1480     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1481       UseStoreImmI16 = false; // don't use it on ZX cpus
1482     }
1483     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1484       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1485         // Use it on all ZX cpus
1486         UseAddressNop = true;
1487       }
1488     }
1489     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1490       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1491     }
1492     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1493       if (supports_sse3()) {
1494         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1495       } else {
1496         UseXmmRegToRegMoveAll = false;
1497       }
1498     }
1499     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1500 #ifdef COMPILER2
1501       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1502         // For new ZX cpus do the next optimization:
1503         // don't align the beginning of a loop if there are enough instructions
1504         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1505         // in current fetch line (OptoLoopAlignment) or the padding
1506         // is big (> MaxLoopPad).
1507         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1508         // generated NOP instructions. 11 is the largest size of one
1509         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1510         MaxLoopPad = 11;
1511       }
1512 #endif // COMPILER2
1513       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1514         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1515       }
1516       if (supports_sse4_2()) { // new ZX cpus
1517         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1518           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1519         }
1520       }
1521       if (supports_sse4_2()) {
1522         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1523           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1524         }
1525       } else {
1526         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1527           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1528         }
1529         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1530       }
1531     }
1532 
1533     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1534       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1535     }
1536   }
1537 
1538   if (is_amd_family()) { // AMD cpus specific settings
1539     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1540       // Use it on new AMD cpus starting from Opteron.
1541       UseAddressNop = true;
1542     }
1543     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1544       // Use it on new AMD cpus starting from Opteron.
1545       UseNewLongLShift = true;
1546     }
1547     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1548       if (supports_sse4a()) {
1549         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1550       } else {
1551         UseXmmLoadAndClearUpper = false;
1552       }
1553     }
1554     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1555       if (supports_sse4a()) {
1556         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1557       } else {
1558         UseXmmRegToRegMoveAll = false;
1559       }
1560     }
1561     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1562       if (supports_sse4a()) {
1563         UseXmmI2F = true;
1564       } else {
1565         UseXmmI2F = false;
1566       }
1567     }
1568     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1569       if (supports_sse4a()) {
1570         UseXmmI2D = true;
1571       } else {
1572         UseXmmI2D = false;
1573       }
1574     }
1575     if (supports_sse4_2()) {
1576       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1577         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1578       }
1579     } else {
1580       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1581         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1582       }
1583       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1584     }
1585 
1586     // some defaults for AMD family 15h
1587     if (cpu_family() == 0x15) {
1588       // On family 15h processors default is no sw prefetch
1589       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1590         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1591       }
1592       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1593       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1594         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1595       }
1596       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1597       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1598         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1599       }
1600       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1601         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1602       }
1603     }
1604 
1605 #ifdef COMPILER2
1606     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1607       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1608       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1609     }
1610 #endif // COMPILER2
1611 
1612     // Some defaults for AMD family >= 17h && Hygon family 18h
1613     if (cpu_family() >= 0x17) {
1614       // On family >=17h processors use XMM and UnalignedLoadStores
1615       // for Array Copy
1616       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1617         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1618       }
1619       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1620         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1621       }
1622 #ifdef COMPILER2
1623       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1624         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1625       }
1626 #endif
1627     }
1628   }
1629 
1630   if (is_intel()) { // Intel cpus specific settings
1631     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1632       UseStoreImmI16 = false; // don't use it on Intel cpus
1633     }
1634     if (cpu_family() == 6 || cpu_family() == 15) {
1635       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1636         // Use it on all Intel cpus starting from PentiumPro
1637         UseAddressNop = true;
1638       }
1639     }
1640     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1641       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1642     }
1643     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1644       if (supports_sse3()) {
1645         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1646       } else {
1647         UseXmmRegToRegMoveAll = false;
1648       }
1649     }
1650     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1651 #ifdef COMPILER2
1652       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1653         // For new Intel cpus do the next optimization:
1654         // don't align the beginning of a loop if there are enough instructions
1655         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1656         // in current fetch line (OptoLoopAlignment) or the padding
1657         // is big (> MaxLoopPad).
1658         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1659         // generated NOP instructions. 11 is the largest size of one
1660         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1661         MaxLoopPad = 11;
1662       }
1663 #endif // COMPILER2
1664 
1665       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1666         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1667       }
1668       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1669         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1670           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1671         }
1672       }
1673       if (supports_sse4_2()) {
1674         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1675           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1676         }
1677       } else {
1678         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1679           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1680         }
1681         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1682       }
1683     }
1684     if (is_atom_family() || is_knights_family()) {
1685 #ifdef COMPILER2
1686       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1687         OptoScheduling = true;
1688       }
1689 #endif
1690       if (supports_sse4_2()) { // Silvermont
1691         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1692           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1693         }
1694       }
1695       if (FLAG_IS_DEFAULT(UseIncDec)) {
1696         FLAG_SET_DEFAULT(UseIncDec, false);
1697       }
1698     }
1699     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1700       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1701     }
1702 #ifdef COMPILER2
1703     if (UseAVX > 2) {
1704       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1705           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1706            ArrayOperationPartialInlineSize != 0 &&
1707            ArrayOperationPartialInlineSize != 16 &&
1708            ArrayOperationPartialInlineSize != 32 &&
1709            ArrayOperationPartialInlineSize != 64)) {
1710         int inline_size = 0;
1711         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1712           inline_size = 64;
1713         } else if (MaxVectorSize >= 32) {
1714           inline_size = 32;
1715         } else if (MaxVectorSize >= 16) {
1716           inline_size = 16;
1717         }
1718         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1719           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1720         }
1721         ArrayOperationPartialInlineSize = inline_size;
1722       }
1723 
1724       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1725         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1726         if (ArrayOperationPartialInlineSize) {
1727           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1728         } else {
1729           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1730         }
1731       }
1732     }
1733 #endif
1734   }
1735 
1736 #ifdef COMPILER2
1737   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1738     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1739       OptimizeFill = false;
1740     }
1741   }
1742 #endif
1743 
1744 #ifdef _LP64
1745   if (UseSSE42Intrinsics) {
1746     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1747       UseVectorizedMismatchIntrinsic = true;
1748     }
1749   } else if (UseVectorizedMismatchIntrinsic) {
1750     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1751       warning("vectorizedMismatch intrinsics are not available on this CPU");
1752     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1753   }
1754   if (UseAVX >= 2) {
1755     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1756   } else if (UseVectorizedHashCodeIntrinsic) {
1757     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1758       warning("vectorizedHashCode intrinsics are not available on this CPU");
1759     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1760   }
1761 #else
1762   if (UseVectorizedMismatchIntrinsic) {
1763     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1764       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1765     }
1766     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1767   }
1768   if (UseVectorizedHashCodeIntrinsic) {
1769     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1770       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1771     }
1772     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1773   }
1774 #endif // _LP64
1775 
1776   // Use count leading zeros count instruction if available.
1777   if (supports_lzcnt()) {
1778     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1779       UseCountLeadingZerosInstruction = true;
1780     }
1781    } else if (UseCountLeadingZerosInstruction) {
1782     warning("lzcnt instruction is not available on this CPU");
1783     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1784   }
1785 
1786   // Use count trailing zeros instruction if available
1787   if (supports_bmi1()) {
1788     // tzcnt does not require VEX prefix
1789     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1790       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1791         // Don't use tzcnt if BMI1 is switched off on command line.
1792         UseCountTrailingZerosInstruction = false;
1793       } else {
1794         UseCountTrailingZerosInstruction = true;
1795       }
1796     }
1797   } else if (UseCountTrailingZerosInstruction) {
1798     warning("tzcnt instruction is not available on this CPU");
1799     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1800   }
1801 
1802   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1803   // VEX prefix is generated only when AVX > 0.
1804   if (supports_bmi1() && supports_avx()) {
1805     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1806       UseBMI1Instructions = true;
1807     }
1808   } else if (UseBMI1Instructions) {
1809     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1810     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1811   }
1812 
1813   if (supports_bmi2() && supports_avx()) {
1814     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1815       UseBMI2Instructions = true;
1816     }
1817   } else if (UseBMI2Instructions) {
1818     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1819     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1820   }
1821 
1822   // Use population count instruction if available.
1823   if (supports_popcnt()) {
1824     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1825       UsePopCountInstruction = true;
1826     }
1827   } else if (UsePopCountInstruction) {
1828     warning("POPCNT instruction is not available on this CPU");
1829     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1830   }
1831 
1832   // Use fast-string operations if available.
1833   if (supports_erms()) {
1834     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1835       UseFastStosb = true;
1836     }
1837   } else if (UseFastStosb) {
1838     warning("fast-string operations are not available on this CPU");
1839     FLAG_SET_DEFAULT(UseFastStosb, false);
1840   }
1841 
1842   // For AMD Processors use XMM/YMM MOVDQU instructions
1843   // for Object Initialization as default
1844   if (is_amd() && cpu_family() >= 0x19) {
1845     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1846       UseFastStosb = false;
1847     }
1848   }
1849 
1850 #ifdef COMPILER2
1851   if (is_intel() && MaxVectorSize > 16) {
1852     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1853       UseFastStosb = false;
1854     }
1855   }
1856 #endif
1857 
1858   // Use XMM/YMM MOVDQU instruction for Object Initialization
1859   if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1860     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1861       UseXMMForObjInit = true;
1862     }
1863   } else if (UseXMMForObjInit) {
1864     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1865     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1866   }
1867 
1868 #ifdef COMPILER2
1869   if (FLAG_IS_DEFAULT(AlignVector)) {
1870     // Modern processors allow misaligned memory operations for vectors.
1871     AlignVector = !UseUnalignedLoadStores;
1872   }
1873 #endif // COMPILER2
1874 
1875   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1876     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1877       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1878     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1879       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1880     }
1881   }
1882 
1883   // Allocation prefetch settings
1884   int cache_line_size = checked_cast<int>(prefetch_data_size());
1885   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1886       (cache_line_size > AllocatePrefetchStepSize)) {
1887     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1888   }
1889 
1890   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1891     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1892     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1893       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1894     }
1895     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1896   }
1897 
1898   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1899     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1900     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1901   }
1902 
1903   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1904     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1905         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1906       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1907     }
1908 #ifdef COMPILER2
1909     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1910       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1911     }
1912 #endif
1913   }
1914 
1915   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1916 #ifdef COMPILER2
1917     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1918       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1919     }
1920 #endif
1921   }
1922 
1923 #ifdef _LP64
1924   // Prefetch settings
1925 
1926   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1927   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1928   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1929   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1930 
1931   // gc copy/scan is disabled if prefetchw isn't supported, because
1932   // Prefetch::write emits an inlined prefetchw on Linux.
1933   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1934   // The used prefetcht0 instruction works for both amd64 and em64t.
1935 
1936   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1937     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1938   }
1939   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1940     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1941   }
1942 #endif
1943 
1944   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1945      (cache_line_size > ContendedPaddingWidth))
1946      ContendedPaddingWidth = cache_line_size;
1947 
1948   // This machine allows unaligned memory accesses
1949   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1950     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1951   }
1952 
1953 #ifndef PRODUCT
1954   if (log_is_enabled(Info, os, cpu)) {
1955     LogStream ls(Log(os, cpu)::info());
1956     outputStream* log = &ls;
1957     log->print_cr("Logical CPUs per core: %u",
1958                   logical_processors_per_package());
1959     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1960     log->print("UseSSE=%d", UseSSE);
1961     if (UseAVX > 0) {
1962       log->print("  UseAVX=%d", UseAVX);
1963     }
1964     if (UseAES) {
1965       log->print("  UseAES=1");
1966     }
1967 #ifdef COMPILER2
1968     if (MaxVectorSize > 0) {
1969       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1970     }
1971 #endif
1972     log->cr();
1973     log->print("Allocation");
1974     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1975       log->print_cr(": no prefetching");
1976     } else {
1977       log->print(" prefetching: ");
1978       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1979         log->print("PREFETCHW");
1980       } else if (UseSSE >= 1) {
1981         if (AllocatePrefetchInstr == 0) {
1982           log->print("PREFETCHNTA");
1983         } else if (AllocatePrefetchInstr == 1) {
1984           log->print("PREFETCHT0");
1985         } else if (AllocatePrefetchInstr == 2) {
1986           log->print("PREFETCHT2");
1987         } else if (AllocatePrefetchInstr == 3) {
1988           log->print("PREFETCHW");
1989         }
1990       }
1991       if (AllocatePrefetchLines > 1) {
1992         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1993       } else {
1994         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1995       }
1996     }
1997 
1998     if (PrefetchCopyIntervalInBytes > 0) {
1999       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
2000     }
2001     if (PrefetchScanIntervalInBytes > 0) {
2002       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
2003     }
2004     if (ContendedPaddingWidth > 0) {
2005       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
2006     }
2007   }
2008 #endif // !PRODUCT
2009   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
2010       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
2011   }
2012   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
2013       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
2014   }
2015 }
2016 
2017 void VM_Version::print_platform_virtualization_info(outputStream* st) {
2018   VirtualizationType vrt = VM_Version::get_detected_virtualization();
2019   if (vrt == XenHVM) {
2020     st->print_cr("Xen hardware-assisted virtualization detected");
2021   } else if (vrt == KVM) {
2022     st->print_cr("KVM virtualization detected");
2023   } else if (vrt == VMWare) {
2024     st->print_cr("VMWare virtualization detected");
2025     VirtualizationSupport::print_virtualization_info(st);
2026   } else if (vrt == HyperV) {
2027     st->print_cr("Hyper-V virtualization detected");
2028   } else if (vrt == HyperVRole) {
2029     st->print_cr("Hyper-V role detected");
2030   }
2031 }
2032 
2033 bool VM_Version::compute_has_intel_jcc_erratum() {
2034   if (!is_intel_family_core()) {
2035     // Only Intel CPUs are affected.
2036     return false;
2037   }
2038   // The following table of affected CPUs is based on the following document released by Intel:
2039   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2040   switch (_model) {
2041   case 0x8E:
2042     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2043     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2044     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2045     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2046     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2047     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2048     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2049     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2050     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2051     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2052   case 0x4E:
2053     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2054     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2055     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2056     return _stepping == 0x3;
2057   case 0x55:
2058     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2059     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2060     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2061     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2062     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2063     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2064     return _stepping == 0x4 || _stepping == 0x7;
2065   case 0x5E:
2066     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2067     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2068     return _stepping == 0x3;
2069   case 0x9E:
2070     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2071     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2072     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2073     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2074     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2075     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2076     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2077     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2078     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2079     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2080     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2081     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2082     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2083     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2084     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2085   case 0xA5:
2086     // Not in Intel documentation.
2087     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2088     return true;
2089   case 0xA6:
2090     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2091     return _stepping == 0x0;
2092   case 0xAE:
2093     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2094     return _stepping == 0xA;
2095   default:
2096     // If we are running on another intel machine not recognized in the table, we are okay.
2097     return false;
2098   }
2099 }
2100 
2101 // On Xen, the cpuid instruction returns
2102 //  eax / registers[0]: Version of Xen
2103 //  ebx / registers[1]: chars 'XenV'
2104 //  ecx / registers[2]: chars 'MMXe'
2105 //  edx / registers[3]: chars 'nVMM'
2106 //
2107 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2108 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2109 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2110 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2111 //
2112 // more information :
2113 // https://kb.vmware.com/s/article/1009458
2114 //
2115 void VM_Version::check_virtualizations() {
2116   uint32_t registers[4] = {0};
2117   char signature[13] = {0};
2118 
2119   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2120   // from 0x40000000 until 0x40010000.
2121   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2122   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2123     detect_virt_stub(leaf, registers);
2124     memcpy(signature, &registers[1], 12);
2125 
2126     if (strncmp("VMwareVMware", signature, 12) == 0) {
2127       Abstract_VM_Version::_detected_virtualization = VMWare;
2128       // check for extended metrics from guestlib
2129       VirtualizationSupport::initialize();
2130     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2131       Abstract_VM_Version::_detected_virtualization = HyperV;
2132 #ifdef _WINDOWS
2133       // CPUID leaf 0x40000007 is available to the root partition only.
2134       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2135       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2136       detect_virt_stub(0x40000007, registers);
2137       if ((registers[0] != 0x0) ||
2138           (registers[1] != 0x0) ||
2139           (registers[2] != 0x0) ||
2140           (registers[3] != 0x0)) {
2141         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2142       }
2143 #endif
2144     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2145       Abstract_VM_Version::_detected_virtualization = KVM;
2146     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2147       Abstract_VM_Version::_detected_virtualization = XenHVM;
2148     }
2149   }
2150 }
2151 
2152 #ifdef COMPILER2
2153 // Determine if it's running on Cascade Lake using default options.
2154 bool VM_Version::is_default_intel_cascade_lake() {
2155   return FLAG_IS_DEFAULT(UseAVX) &&
2156          FLAG_IS_DEFAULT(MaxVectorSize) &&
2157          UseAVX > 2 &&
2158          is_intel_cascade_lake();
2159 }
2160 #endif
2161 
2162 bool VM_Version::is_intel_cascade_lake() {
2163   return is_intel_skylake() && _stepping >= 5;
2164 }
2165 
2166 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2167 // for implementing the array copy and clear operations.
2168 // The Intel platforms that supports the serialize instruction
2169 // has improved implementation of 64-byte load/stores and so the default
2170 // threshold is set to 0 for these platforms.
2171 int VM_Version::avx3_threshold() {
2172   return (is_intel_family_core() &&
2173           supports_serialize() &&
2174           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2175 }
2176 
2177 #if defined(_LP64)
2178 void VM_Version::clear_apx_test_state() {
2179   clear_apx_test_state_stub();
2180 }
2181 #endif
2182 
2183 static bool _vm_version_initialized = false;
2184 
2185 void VM_Version::initialize() {
2186   ResourceMark rm;
2187   // Making this stub must be FIRST use of assembler
2188   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2189   if (stub_blob == nullptr) {
2190     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2191   }
2192   CodeBuffer c(stub_blob);
2193   VM_Version_StubGenerator g(&c);
2194 
2195   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2196                                      g.generate_get_cpu_info());
2197   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2198                                      g.generate_detect_virt());
2199 
2200 #if defined(_LP64)
2201   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2202                                      g.clear_apx_test_state());
2203 #endif
2204   get_processor_features();
2205 
2206   LP64_ONLY(Assembler::precompute_instructions();)
2207 
2208   if (VM_Version::supports_hv()) { // Supports hypervisor
2209     check_virtualizations();
2210   }
2211   _vm_version_initialized = true;
2212 }
2213 
2214 typedef enum {
2215    CPU_FAMILY_8086_8088  = 0,
2216    CPU_FAMILY_INTEL_286  = 2,
2217    CPU_FAMILY_INTEL_386  = 3,
2218    CPU_FAMILY_INTEL_486  = 4,
2219    CPU_FAMILY_PENTIUM    = 5,
2220    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2221    CPU_FAMILY_PENTIUM_4  = 0xF
2222 } FamilyFlag;
2223 
2224 typedef enum {
2225   RDTSCP_FLAG  = 0x08000000, // bit 27
2226   INTEL64_FLAG = 0x20000000  // bit 29
2227 } _featureExtendedEdxFlag;
2228 
2229 typedef enum {
2230    FPU_FLAG     = 0x00000001,
2231    VME_FLAG     = 0x00000002,
2232    DE_FLAG      = 0x00000004,
2233    PSE_FLAG     = 0x00000008,
2234    TSC_FLAG     = 0x00000010,
2235    MSR_FLAG     = 0x00000020,
2236    PAE_FLAG     = 0x00000040,
2237    MCE_FLAG     = 0x00000080,
2238    CX8_FLAG     = 0x00000100,
2239    APIC_FLAG    = 0x00000200,
2240    SEP_FLAG     = 0x00000800,
2241    MTRR_FLAG    = 0x00001000,
2242    PGE_FLAG     = 0x00002000,
2243    MCA_FLAG     = 0x00004000,
2244    CMOV_FLAG    = 0x00008000,
2245    PAT_FLAG     = 0x00010000,
2246    PSE36_FLAG   = 0x00020000,
2247    PSNUM_FLAG   = 0x00040000,
2248    CLFLUSH_FLAG = 0x00080000,
2249    DTS_FLAG     = 0x00200000,
2250    ACPI_FLAG    = 0x00400000,
2251    MMX_FLAG     = 0x00800000,
2252    FXSR_FLAG    = 0x01000000,
2253    SSE_FLAG     = 0x02000000,
2254    SSE2_FLAG    = 0x04000000,
2255    SS_FLAG      = 0x08000000,
2256    HTT_FLAG     = 0x10000000,
2257    TM_FLAG      = 0x20000000
2258 } FeatureEdxFlag;
2259 
2260 static BufferBlob* cpuid_brand_string_stub_blob;
2261 static const int   cpuid_brand_string_stub_size = 550;
2262 
2263 extern "C" {
2264   typedef void (*getCPUIDBrandString_stub_t)(void*);
2265 }
2266 
2267 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2268 
2269 // VM_Version statics
2270 enum {
2271   ExtendedFamilyIdLength_INTEL = 16,
2272   ExtendedFamilyIdLength_AMD   = 24
2273 };
2274 
2275 const size_t VENDOR_LENGTH = 13;
2276 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2277 static char* _cpu_brand_string = nullptr;
2278 static int64_t _max_qualified_cpu_frequency = 0;
2279 
2280 static int _no_of_threads = 0;
2281 static int _no_of_cores = 0;
2282 
2283 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2284   "8086/8088",
2285   "",
2286   "286",
2287   "386",
2288   "486",
2289   "Pentium",
2290   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2291   "",
2292   "",
2293   "",
2294   "",
2295   "",
2296   "",
2297   "",
2298   "",
2299   "Pentium 4"
2300 };
2301 
2302 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2303   "",
2304   "",
2305   "",
2306   "",
2307   "5x86",
2308   "K5/K6",
2309   "Athlon/AthlonXP",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "",
2315   "",
2316   "",
2317   "",
2318   "Opteron/Athlon64",
2319   "Opteron QC/Phenom",  // Barcelona et.al.
2320   "",
2321   "",
2322   "",
2323   "",
2324   "",
2325   "",
2326   "Zen"
2327 };
2328 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2329 // September 2013, Vol 3C Table 35-1
2330 const char* const _model_id_pentium_pro[] = {
2331   "",
2332   "Pentium Pro",
2333   "",
2334   "Pentium II model 3",
2335   "",
2336   "Pentium II model 5/Xeon/Celeron",
2337   "Celeron",
2338   "Pentium III/Pentium III Xeon",
2339   "Pentium III/Pentium III Xeon",
2340   "Pentium M model 9",    // Yonah
2341   "Pentium III, model A",
2342   "Pentium III, model B",
2343   "",
2344   "Pentium M model D",    // Dothan
2345   "",
2346   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2347   "",
2348   "",
2349   "",
2350   "",
2351   "",
2352   "",
2353   "Celeron",              // 0x16 Celeron 65nm
2354   "Core 2",               // 0x17 Penryn / Harpertown
2355   "",
2356   "",
2357   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2358   "Atom",                 // 0x1B Z5xx series Silverthorn
2359   "",
2360   "Core 2",               // 0x1D Dunnington (6-core)
2361   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2362   "",
2363   "",
2364   "",
2365   "",
2366   "",
2367   "",
2368   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2369   "",
2370   "",
2371   "",                     // 0x28
2372   "",
2373   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2374   "",
2375   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2376   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2377   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2378   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2379   "",
2380   "",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "Ivy Bridge",           // 0x3a
2390   "",
2391   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2392   "",                     // 0x3d "Next Generation Intel Core Processor"
2393   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2394   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2395   "",
2396   "",
2397   "",
2398   "",
2399   "",
2400   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2401   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2402   nullptr
2403 };
2404 
2405 /* Brand ID is for back compatibility
2406  * Newer CPUs uses the extended brand string */
2407 const char* const _brand_id[] = {
2408   "",
2409   "Celeron processor",
2410   "Pentium III processor",
2411   "Intel Pentium III Xeon processor",
2412   "",
2413   "",
2414   "",
2415   "",
2416   "Intel Pentium 4 processor",
2417   nullptr
2418 };
2419 
2420 
2421 const char* const _feature_edx_id[] = {
2422   "On-Chip FPU",
2423   "Virtual Mode Extensions",
2424   "Debugging Extensions",
2425   "Page Size Extensions",
2426   "Time Stamp Counter",
2427   "Model Specific Registers",
2428   "Physical Address Extension",
2429   "Machine Check Exceptions",
2430   "CMPXCHG8B Instruction",
2431   "On-Chip APIC",
2432   "",
2433   "Fast System Call",
2434   "Memory Type Range Registers",
2435   "Page Global Enable",
2436   "Machine Check Architecture",
2437   "Conditional Mov Instruction",
2438   "Page Attribute Table",
2439   "36-bit Page Size Extension",
2440   "Processor Serial Number",
2441   "CLFLUSH Instruction",
2442   "",
2443   "Debug Trace Store feature",
2444   "ACPI registers in MSR space",
2445   "Intel Architecture MMX Technology",
2446   "Fast Float Point Save and Restore",
2447   "Streaming SIMD extensions",
2448   "Streaming SIMD extensions 2",
2449   "Self-Snoop",
2450   "Hyper Threading",
2451   "Thermal Monitor",
2452   "",
2453   "Pending Break Enable"
2454 };
2455 
2456 const char* const _feature_extended_edx_id[] = {
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "SYSCALL/SYSRET",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "Execute Disable Bit",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "RDTSCP",
2485   "",
2486   "Intel 64 Architecture",
2487   "",
2488   ""
2489 };
2490 
2491 const char* const _feature_ecx_id[] = {
2492   "Streaming SIMD Extensions 3",
2493   "PCLMULQDQ",
2494   "64-bit DS Area",
2495   "MONITOR/MWAIT instructions",
2496   "CPL Qualified Debug Store",
2497   "Virtual Machine Extensions",
2498   "Safer Mode Extensions",
2499   "Enhanced Intel SpeedStep technology",
2500   "Thermal Monitor 2",
2501   "Supplemental Streaming SIMD Extensions 3",
2502   "L1 Context ID",
2503   "",
2504   "Fused Multiply-Add",
2505   "CMPXCHG16B",
2506   "xTPR Update Control",
2507   "Perfmon and Debug Capability",
2508   "",
2509   "Process-context identifiers",
2510   "Direct Cache Access",
2511   "Streaming SIMD extensions 4.1",
2512   "Streaming SIMD extensions 4.2",
2513   "x2APIC",
2514   "MOVBE",
2515   "Popcount instruction",
2516   "TSC-Deadline",
2517   "AESNI",
2518   "XSAVE",
2519   "OSXSAVE",
2520   "AVX",
2521   "F16C",
2522   "RDRAND",
2523   ""
2524 };
2525 
2526 const char* const _feature_extended_ecx_id[] = {
2527   "LAHF/SAHF instruction support",
2528   "Core multi-processor legacy mode",
2529   "",
2530   "",
2531   "",
2532   "Advanced Bit Manipulations: LZCNT",
2533   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2534   "Misaligned SSE mode",
2535   "",
2536   "",
2537   "",
2538   "",
2539   "",
2540   "",
2541   "",
2542   "",
2543   "",
2544   "",
2545   "",
2546   "",
2547   "",
2548   "",
2549   "",
2550   "",
2551   "",
2552   "",
2553   "",
2554   "",
2555   "",
2556   "",
2557   "",
2558   ""
2559 };
2560 
2561 void VM_Version::initialize_tsc(void) {
2562   ResourceMark rm;
2563 
2564   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2565   if (cpuid_brand_string_stub_blob == nullptr) {
2566     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2567   }
2568   CodeBuffer c(cpuid_brand_string_stub_blob);
2569   VM_Version_StubGenerator g(&c);
2570   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2571                                    g.generate_getCPUIDBrandString());
2572 }
2573 
2574 const char* VM_Version::cpu_model_description(void) {
2575   uint32_t cpu_family = extended_cpu_family();
2576   uint32_t cpu_model = extended_cpu_model();
2577   const char* model = nullptr;
2578 
2579   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2580     for (uint32_t i = 0; i <= cpu_model; i++) {
2581       model = _model_id_pentium_pro[i];
2582       if (model == nullptr) {
2583         break;
2584       }
2585     }
2586   }
2587   return model;
2588 }
2589 
2590 const char* VM_Version::cpu_brand_string(void) {
2591   if (_cpu_brand_string == nullptr) {
2592     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2593     if (nullptr == _cpu_brand_string) {
2594       return nullptr;
2595     }
2596     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2597     if (ret_val != OS_OK) {
2598       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2599       _cpu_brand_string = nullptr;
2600     }
2601   }
2602   return _cpu_brand_string;
2603 }
2604 
2605 const char* VM_Version::cpu_brand(void) {
2606   const char*  brand  = nullptr;
2607 
2608   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2609     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2610     brand = _brand_id[0];
2611     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2612       brand = _brand_id[i];
2613     }
2614   }
2615   return brand;
2616 }
2617 
2618 bool VM_Version::cpu_is_em64t(void) {
2619   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2620 }
2621 
2622 bool VM_Version::is_netburst(void) {
2623   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2624 }
2625 
2626 bool VM_Version::supports_tscinv_ext(void) {
2627   if (!supports_tscinv_bit()) {
2628     return false;
2629   }
2630 
2631   if (is_intel()) {
2632     return true;
2633   }
2634 
2635   if (is_amd()) {
2636     return !is_amd_Barcelona();
2637   }
2638 
2639   if (is_hygon()) {
2640     return true;
2641   }
2642 
2643   return false;
2644 }
2645 
2646 void VM_Version::resolve_cpu_information_details(void) {
2647 
2648   // in future we want to base this information on proper cpu
2649   // and cache topology enumeration such as:
2650   // Intel 64 Architecture Processor Topology Enumeration
2651   // which supports system cpu and cache topology enumeration
2652   // either using 2xAPICIDs or initial APICIDs
2653 
2654   // currently only rough cpu information estimates
2655   // which will not necessarily reflect the exact configuration of the system
2656 
2657   // this is the number of logical hardware threads
2658   // visible to the operating system
2659   _no_of_threads = os::processor_count();
2660 
2661   // find out number of threads per cpu package
2662   int threads_per_package = threads_per_core() * cores_per_cpu();
2663 
2664   // use amount of threads visible to the process in order to guess number of sockets
2665   _no_of_sockets = _no_of_threads / threads_per_package;
2666 
2667   // process might only see a subset of the total number of threads
2668   // from a single processor package. Virtualization/resource management for example.
2669   // If so then just write a hard 1 as num of pkgs.
2670   if (0 == _no_of_sockets) {
2671     _no_of_sockets = 1;
2672   }
2673 
2674   // estimate the number of cores
2675   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2676 }
2677 
2678 
2679 const char* VM_Version::cpu_family_description(void) {
2680   int cpu_family_id = extended_cpu_family();
2681   if (is_amd()) {
2682     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2683       return _family_id_amd[cpu_family_id];
2684     }
2685   }
2686   if (is_intel()) {
2687     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2688       return cpu_model_description();
2689     }
2690     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2691       return _family_id_intel[cpu_family_id];
2692     }
2693   }
2694   if (is_hygon()) {
2695     return "Dhyana";
2696   }
2697   return "Unknown x86";
2698 }
2699 
2700 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2701   assert(buf != nullptr, "buffer is null!");
2702   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2703 
2704   const char* cpu_type = nullptr;
2705   const char* x64 = nullptr;
2706 
2707   if (is_intel()) {
2708     cpu_type = "Intel";
2709     x64 = cpu_is_em64t() ? " Intel64" : "";
2710   } else if (is_amd()) {
2711     cpu_type = "AMD";
2712     x64 = cpu_is_em64t() ? " AMD64" : "";
2713   } else if (is_hygon()) {
2714     cpu_type = "Hygon";
2715     x64 = cpu_is_em64t() ? " AMD64" : "";
2716   } else {
2717     cpu_type = "Unknown x86";
2718     x64 = cpu_is_em64t() ? " x86_64" : "";
2719   }
2720 
2721   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2722     cpu_type,
2723     cpu_family_description(),
2724     supports_ht() ? " (HT)" : "",
2725     supports_sse3() ? " SSE3" : "",
2726     supports_ssse3() ? " SSSE3" : "",
2727     supports_sse4_1() ? " SSE4.1" : "",
2728     supports_sse4_2() ? " SSE4.2" : "",
2729     supports_sse4a() ? " SSE4A" : "",
2730     is_netburst() ? " Netburst" : "",
2731     is_intel_family_core() ? " Core" : "",
2732     x64);
2733 
2734   return OS_OK;
2735 }
2736 
2737 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2738   assert(buf != nullptr, "buffer is null!");
2739   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2740   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2741 
2742   // invoke newly generated asm code to fetch CPU Brand String
2743   getCPUIDBrandString_stub(&_cpuid_info);
2744 
2745   // fetch results into buffer
2746   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2747   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2748   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2749   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2750   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2751   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2752   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2753   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2754   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2755   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2756   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2757   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2758 
2759   return OS_OK;
2760 }
2761 
2762 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2763   guarantee(buf != nullptr, "buffer is null!");
2764   guarantee(buf_len > 0, "buffer len not enough!");
2765 
2766   unsigned int flag = 0;
2767   unsigned int fi = 0;
2768   size_t       written = 0;
2769   const char*  prefix = "";
2770 
2771 #define WRITE_TO_BUF(string)                                                          \
2772   {                                                                                   \
2773     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2774     if (res < 0) {                                                                    \
2775       return buf_len - 1;                                                             \
2776     }                                                                                 \
2777     written += res;                                                                   \
2778     if (prefix[0] == '\0') {                                                          \
2779       prefix = ", ";                                                                  \
2780     }                                                                                 \
2781   }
2782 
2783   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2784     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2785       continue; /* no hyperthreading */
2786     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2787       continue; /* no fast system call */
2788     }
2789     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2790       WRITE_TO_BUF(_feature_edx_id[fi]);
2791     }
2792   }
2793 
2794   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2795     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2796       WRITE_TO_BUF(_feature_ecx_id[fi]);
2797     }
2798   }
2799 
2800   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2801     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2802       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2803     }
2804   }
2805 
2806   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2807     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2808       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2809     }
2810   }
2811 
2812   if (supports_tscinv_bit()) {
2813       WRITE_TO_BUF("Invariant TSC");
2814   }
2815 
2816   return written;
2817 }
2818 
2819 /**
2820  * Write a detailed description of the cpu to a given buffer, including
2821  * feature set.
2822  */
2823 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2824   assert(buf != nullptr, "buffer is null!");
2825   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2826 
2827   static const char* unknown = "<unknown>";
2828   char               vendor_id[VENDOR_LENGTH];
2829   const char*        family = nullptr;
2830   const char*        model = nullptr;
2831   const char*        brand = nullptr;
2832   int                outputLen = 0;
2833 
2834   family = cpu_family_description();
2835   if (family == nullptr) {
2836     family = unknown;
2837   }
2838 
2839   model = cpu_model_description();
2840   if (model == nullptr) {
2841     model = unknown;
2842   }
2843 
2844   brand = cpu_brand_string();
2845 
2846   if (brand == nullptr) {
2847     brand = cpu_brand();
2848     if (brand == nullptr) {
2849       brand = unknown;
2850     }
2851   }
2852 
2853   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2854   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2855   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2856   vendor_id[VENDOR_LENGTH-1] = '\0';
2857 
2858   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2859     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2860     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2861     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2862     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2863     "Supports: ",
2864     brand,
2865     vendor_id,
2866     family,
2867     extended_cpu_family(),
2868     model,
2869     extended_cpu_model(),
2870     cpu_stepping(),
2871     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2872     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2873     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2874     _cpuid_info.std_cpuid1_eax.value,
2875     _cpuid_info.std_cpuid1_ebx.value,
2876     _cpuid_info.std_cpuid1_ecx.value,
2877     _cpuid_info.std_cpuid1_edx.value,
2878     _cpuid_info.ext_cpuid1_eax,
2879     _cpuid_info.ext_cpuid1_ebx,
2880     _cpuid_info.ext_cpuid1_ecx,
2881     _cpuid_info.ext_cpuid1_edx);
2882 
2883   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2884     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2885     return OS_ERR;
2886   }
2887 
2888   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2889 
2890   return OS_OK;
2891 }
2892 
2893 
2894 // Fill in Abstract_VM_Version statics
2895 void VM_Version::initialize_cpu_information() {
2896   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2897   assert(!_initialized, "shouldn't be initialized yet");
2898   resolve_cpu_information_details();
2899 
2900   // initialize cpu_name and cpu_desc
2901   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2902   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2903   _initialized = true;
2904 }
2905 
2906 /**
2907  *  For information about extracting the frequency from the cpu brand string, please see:
2908  *
2909  *    Intel Processor Identification and the CPUID Instruction
2910  *    Application Note 485
2911  *    May 2012
2912  *
2913  * The return value is the frequency in Hz.
2914  */
2915 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2916   const char* const brand_string = cpu_brand_string();
2917   if (brand_string == nullptr) {
2918     return 0;
2919   }
2920   const int64_t MEGA = 1000000;
2921   int64_t multiplier = 0;
2922   int64_t frequency = 0;
2923   uint8_t idx = 0;
2924   // The brand string buffer is at most 48 bytes.
2925   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2926   for (; idx < 48-2; ++idx) {
2927     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2928     // Search brand string for "yHz" where y is M, G, or T.
2929     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2930       if (brand_string[idx] == 'M') {
2931         multiplier = MEGA;
2932       } else if (brand_string[idx] == 'G') {
2933         multiplier = MEGA * 1000;
2934       } else if (brand_string[idx] == 'T') {
2935         multiplier = MEGA * MEGA;
2936       }
2937       break;
2938     }
2939   }
2940   if (multiplier > 0) {
2941     // Compute frequency (in Hz) from brand string.
2942     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2943       frequency =  (brand_string[idx-4] - '0') * multiplier;
2944       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2945       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2946     } else { // format is "xxxx"
2947       frequency =  (brand_string[idx-4] - '0') * 1000;
2948       frequency += (brand_string[idx-3] - '0') * 100;
2949       frequency += (brand_string[idx-2] - '0') * 10;
2950       frequency += (brand_string[idx-1] - '0');
2951       frequency *= multiplier;
2952     }
2953   }
2954   return frequency;
2955 }
2956 
2957 
2958 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2959   if (_max_qualified_cpu_frequency == 0) {
2960     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2961   }
2962   return _max_qualified_cpu_frequency;
2963 }
2964 
2965 uint64_t VM_Version::CpuidInfo::feature_flags() const {
2966   uint64_t result = 0;
2967   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2968     result |= CPU_CX8;
2969   if (std_cpuid1_edx.bits.cmov != 0)
2970     result |= CPU_CMOV;
2971   if (std_cpuid1_edx.bits.clflush != 0)
2972     result |= CPU_FLUSH;
2973 #ifdef _LP64
2974   // clflush should always be available on x86_64
2975   // if not we are in real trouble because we rely on it
2976   // to flush the code cache.
2977   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2978 #endif
2979   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2980       ext_cpuid1_edx.bits.fxsr != 0))
2981     result |= CPU_FXSR;
2982   // HT flag is set for multi-core processors also.
2983   if (threads_per_core() > 1)
2984     result |= CPU_HT;
2985   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2986       ext_cpuid1_edx.bits.mmx != 0))
2987     result |= CPU_MMX;
2988   if (std_cpuid1_edx.bits.sse != 0)
2989     result |= CPU_SSE;
2990   if (std_cpuid1_edx.bits.sse2 != 0)
2991     result |= CPU_SSE2;
2992   if (std_cpuid1_ecx.bits.sse3 != 0)
2993     result |= CPU_SSE3;
2994   if (std_cpuid1_ecx.bits.ssse3 != 0)
2995     result |= CPU_SSSE3;
2996   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2997     result |= CPU_SSE4_1;
2998   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2999     result |= CPU_SSE4_2;
3000   if (std_cpuid1_ecx.bits.popcnt != 0)
3001     result |= CPU_POPCNT;
3002   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
3003       xem_xcr0_eax.bits.apx_f != 0) {
3004     result |= CPU_APX_F;
3005   }
3006   if (std_cpuid1_ecx.bits.avx != 0 &&
3007       std_cpuid1_ecx.bits.osxsave != 0 &&
3008       xem_xcr0_eax.bits.sse != 0 &&
3009       xem_xcr0_eax.bits.ymm != 0) {
3010     result |= CPU_AVX;
3011     result |= CPU_VZEROUPPER;
3012     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
3013       result |= CPU_SHA512;
3014     if (std_cpuid1_ecx.bits.f16c != 0)
3015       result |= CPU_F16C;
3016     if (sef_cpuid7_ebx.bits.avx2 != 0) {
3017       result |= CPU_AVX2;
3018       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
3019         result |= CPU_AVX_IFMA;
3020     }
3021     if (sef_cpuid7_ecx.bits.gfni != 0)
3022         result |= CPU_GFNI;
3023     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
3024         xem_xcr0_eax.bits.opmask != 0 &&
3025         xem_xcr0_eax.bits.zmm512 != 0 &&
3026         xem_xcr0_eax.bits.zmm32 != 0) {
3027       result |= CPU_AVX512F;
3028       if (sef_cpuid7_ebx.bits.avx512cd != 0)
3029         result |= CPU_AVX512CD;
3030       if (sef_cpuid7_ebx.bits.avx512dq != 0)
3031         result |= CPU_AVX512DQ;
3032       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
3033         result |= CPU_AVX512_IFMA;
3034       if (sef_cpuid7_ebx.bits.avx512pf != 0)
3035         result |= CPU_AVX512PF;
3036       if (sef_cpuid7_ebx.bits.avx512er != 0)
3037         result |= CPU_AVX512ER;
3038       if (sef_cpuid7_ebx.bits.avx512bw != 0)
3039         result |= CPU_AVX512BW;
3040       if (sef_cpuid7_ebx.bits.avx512vl != 0)
3041         result |= CPU_AVX512VL;
3042       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3043         result |= CPU_AVX512_VPOPCNTDQ;
3044       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3045         result |= CPU_AVX512_VPCLMULQDQ;
3046       if (sef_cpuid7_ecx.bits.vaes != 0)
3047         result |= CPU_AVX512_VAES;
3048       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3049         result |= CPU_AVX512_VNNI;
3050       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3051         result |= CPU_AVX512_BITALG;
3052       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3053         result |= CPU_AVX512_VBMI;
3054       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3055         result |= CPU_AVX512_VBMI2;
3056     }
3057   }
3058   if (std_cpuid1_ecx.bits.hv != 0)
3059     result |= CPU_HV;
3060   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3061     result |= CPU_BMI1;
3062   if (std_cpuid1_edx.bits.tsc != 0)
3063     result |= CPU_TSC;
3064   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3065     result |= CPU_TSCINV_BIT;
3066   if (std_cpuid1_ecx.bits.aes != 0)
3067     result |= CPU_AES;
3068   if (sef_cpuid7_ebx.bits.erms != 0)
3069     result |= CPU_ERMS;
3070   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3071     result |= CPU_FSRM;
3072   if (std_cpuid1_ecx.bits.clmul != 0)
3073     result |= CPU_CLMUL;
3074   if (sef_cpuid7_ebx.bits.rtm != 0)
3075     result |= CPU_RTM;
3076   if (sef_cpuid7_ebx.bits.adx != 0)
3077      result |= CPU_ADX;
3078   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3079     result |= CPU_BMI2;
3080   if (sef_cpuid7_ebx.bits.sha != 0)
3081     result |= CPU_SHA;
3082   if (std_cpuid1_ecx.bits.fma != 0)
3083     result |= CPU_FMA;
3084   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3085     result |= CPU_FLUSHOPT;
3086   if (ext_cpuid1_edx.bits.rdtscp != 0)
3087     result |= CPU_RDTSCP;
3088   if (sef_cpuid7_ecx.bits.rdpid != 0)
3089     result |= CPU_RDPID;
3090 
3091   // AMD|Hygon features.
3092   if (is_amd_family()) {
3093     if ((ext_cpuid1_edx.bits.tdnow != 0) ||
3094         (ext_cpuid1_ecx.bits.prefetchw != 0))
3095       result |= CPU_3DNOW_PREFETCH;
3096     if (ext_cpuid1_ecx.bits.lzcnt != 0)
3097       result |= CPU_LZCNT;
3098     if (ext_cpuid1_ecx.bits.sse4a != 0)
3099       result |= CPU_SSE4A;
3100   }
3101 
3102   // Intel features.
3103   if (is_intel()) {
3104     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3105       result |= CPU_LZCNT;
3106     }
3107     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3108       result |= CPU_3DNOW_PREFETCH;
3109     }
3110     if (sef_cpuid7_ebx.bits.clwb != 0) {
3111       result |= CPU_CLWB;
3112     }
3113     if (sef_cpuid7_edx.bits.serialize != 0)
3114       result |= CPU_SERIALIZE;
3115 
3116     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3117       result |= CPU_AVX512_FP16;
3118   }
3119 
3120   // ZX features.
3121   if (is_zx()) {
3122     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3123       result |= CPU_LZCNT;
3124     }
3125     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3126       result |= CPU_3DNOW_PREFETCH;
3127     }
3128   }
3129 
3130   // Protection key features.
3131   if (sef_cpuid7_ecx.bits.pku != 0) {
3132     result |= CPU_PKU;
3133   }
3134   if (sef_cpuid7_ecx.bits.ospke != 0) {
3135     result |= CPU_OSPKE;
3136   }
3137 
3138   // Control flow enforcement (CET) features.
3139   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3140     result |= CPU_CET_SS;
3141   }
3142   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3143     result |= CPU_CET_IBT;
3144   }
3145 
3146   // Composite features.
3147   if (supports_tscinv_bit() &&
3148       ((is_amd_family() && !is_amd_Barcelona()) ||
3149        is_intel_tsc_synched_at_init())) {
3150     result |= CPU_TSCINV;
3151   }
3152 
3153   return result;
3154 }
3155 
3156 bool VM_Version::os_supports_avx_vectors() {
3157   bool retVal = false;
3158   int nreg = 2 LP64_ONLY(+2);
3159   if (supports_evex()) {
3160     // Verify that OS save/restore all bits of EVEX registers
3161     // during signal processing.
3162     retVal = true;
3163     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3164       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3165         retVal = false;
3166         break;
3167       }
3168     }
3169   } else if (supports_avx()) {
3170     // Verify that OS save/restore all bits of AVX registers
3171     // during signal processing.
3172     retVal = true;
3173     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3174       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3175         retVal = false;
3176         break;
3177       }
3178     }
3179     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3180     if (retVal == false) {
3181       // Verify that OS save/restore all bits of EVEX registers
3182       // during signal processing.
3183       retVal = true;
3184       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3185         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3186           retVal = false;
3187           break;
3188         }
3189       }
3190     }
3191   }
3192   return retVal;
3193 }
3194 
3195 bool VM_Version::os_supports_apx_egprs() {
3196   if (!supports_apx_f()) {
3197     return false;
3198   }
3199   // Enable APX support for product builds after
3200   // completion of planned features listed in JDK-8329030.
3201 #if !defined(PRODUCT)
3202   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3203       _cpuid_info.apx_save[1] != egpr_test_value()) {
3204     return false;
3205   }
3206   return true;
3207 #else
3208   return false;
3209 #endif
3210 }
3211 
3212 uint VM_Version::cores_per_cpu() {
3213   uint result = 1;
3214   if (is_intel()) {
3215     bool supports_topology = supports_processor_topology();
3216     if (supports_topology) {
3217       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3218                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3219     }
3220     if (!supports_topology || result == 0) {
3221       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3222     }
3223   } else if (is_amd_family()) {
3224     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3225   } else if (is_zx()) {
3226     bool supports_topology = supports_processor_topology();
3227     if (supports_topology) {
3228       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3229                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3230     }
3231     if (!supports_topology || result == 0) {
3232       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3233     }
3234   }
3235   return result;
3236 }
3237 
3238 uint VM_Version::threads_per_core() {
3239   uint result = 1;
3240   if (is_intel() && supports_processor_topology()) {
3241     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3242   } else if (is_zx() && supports_processor_topology()) {
3243     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3244   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3245     if (cpu_family() >= 0x17) {
3246       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3247     } else {
3248       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3249                  cores_per_cpu();
3250     }
3251   }
3252   return (result == 0 ? 1 : result);
3253 }
3254 
3255 uint VM_Version::L1_line_size() {
3256   uint result = 0;
3257   if (is_intel()) {
3258     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3259   } else if (is_amd_family()) {
3260     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3261   } else if (is_zx()) {
3262     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3263   }
3264   if (result < 32) // not defined ?
3265     result = 32;   // 32 bytes by default on x86 and other x64
3266   return result;
3267 }
3268 
3269 bool VM_Version::is_intel_tsc_synched_at_init() {
3270   if (is_intel_family_core()) {
3271     uint32_t ext_model = extended_cpu_model();
3272     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3273         ext_model == CPU_MODEL_WESTMERE_EP    ||
3274         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3275         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3276       // <= 2-socket invariant tsc support. EX versions are usually used
3277       // in > 2-socket systems and likely don't synchronize tscs at
3278       // initialization.
3279       // Code that uses tsc values must be prepared for them to arbitrarily
3280       // jump forward or backward.
3281       return true;
3282     }
3283   }
3284   return false;
3285 }
3286 
3287 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3288   // Hardware prefetching (distance/size in bytes):
3289   // Pentium 3 -  64 /  32
3290   // Pentium 4 - 256 / 128
3291   // Athlon    -  64 /  32 ????
3292   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3293   // Core      - 128 /  64
3294   //
3295   // Software prefetching (distance in bytes / instruction with best score):
3296   // Pentium 3 - 128 / prefetchnta
3297   // Pentium 4 - 512 / prefetchnta
3298   // Athlon    - 128 / prefetchnta
3299   // Opteron   - 256 / prefetchnta
3300   // Core      - 256 / prefetchnta
3301   // It will be used only when AllocatePrefetchStyle > 0
3302 
3303   if (is_amd_family()) { // AMD | Hygon
3304     if (supports_sse2()) {
3305       return 256; // Opteron
3306     } else {
3307       return 128; // Athlon
3308     }
3309   } else { // Intel
3310     if (supports_sse3() && cpu_family() == 6) {
3311       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3312         return 192;
3313       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3314 #ifdef _LP64
3315         return 384;
3316 #else
3317         return 320;
3318 #endif
3319       }
3320     }
3321     if (supports_sse2()) {
3322       if (cpu_family() == 6) {
3323         return 256; // Pentium M, Core, Core2
3324       } else {
3325         return 512; // Pentium 4
3326       }
3327     } else {
3328       return 128; // Pentium 3 (and all other old CPUs)
3329     }
3330   }
3331 }
3332 
3333 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3334   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3335   switch (id) {
3336   case vmIntrinsics::_floatToFloat16:
3337   case vmIntrinsics::_float16ToFloat:
3338     if (!supports_float16()) {
3339       return false;
3340     }
3341     break;
3342   default:
3343     break;
3344   }
3345   return true;
3346 }