1 /*
   2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2000;
  66 
  67 extern "C" {
  68   typedef void (*get_cpu_info_stub_t)(void*);
  69   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  70   typedef void (*clear_apx_test_state_t)(void);
  71 }
  72 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  73 static detect_virt_stub_t detect_virt_stub = nullptr;
  74 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  75 
  76 #ifdef _LP64
  77 
  78 bool VM_Version::supports_clflush() {
  79   // clflush should always be available on x86_64
  80   // if not we are in real trouble because we rely on it
  81   // to flush the code cache.
  82   // Unfortunately, Assembler::clflush is currently called as part
  83   // of generation of the code cache flush routine. This happens
  84   // under Universe::init before the processor features are set
  85   // up. Assembler::flush calls this routine to check that clflush
  86   // is allowed. So, we give the caller a free pass if Universe init
  87   // is still in progress.
  88   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  89   return true;
  90 }
  91 #endif
  92 
  93 #define CPUID_STANDARD_FN   0x0
  94 #define CPUID_STANDARD_FN_1 0x1
  95 #define CPUID_STANDARD_FN_4 0x4
  96 #define CPUID_STANDARD_FN_B 0xb
  97 
  98 #define CPUID_EXTENDED_FN   0x80000000
  99 #define CPUID_EXTENDED_FN_1 0x80000001
 100 #define CPUID_EXTENDED_FN_2 0x80000002
 101 #define CPUID_EXTENDED_FN_3 0x80000003
 102 #define CPUID_EXTENDED_FN_4 0x80000004
 103 #define CPUID_EXTENDED_FN_7 0x80000007
 104 #define CPUID_EXTENDED_FN_8 0x80000008
 105 
 106 class VM_Version_StubGenerator: public StubCodeGenerator {
 107  public:
 108 
 109   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 110 
 111 #if defined(_LP64)
 112   address clear_apx_test_state() {
 113 #   define __ _masm->
 114     address start = __ pc();
 115     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 116     // handling guarantees that preserved register values post signal handling were
 117     // re-instantiated by operating system and not because they were not modified externally.
 118 
 119     bool save_apx = UseAPX;
 120     VM_Version::set_apx_cpuFeatures();
 121     UseAPX = true;
 122     // EGPR state save/restoration.
 123     __ mov64(r16, 0L);
 124     __ mov64(r31, 0L);
 125     UseAPX = save_apx;
 126     VM_Version::clean_cpuFeatures();
 127     __ ret(0);
 128     return start;
 129   }
 130 #endif
 131 
 132   address generate_get_cpu_info() {
 133     // Flags to test CPU type.
 134     const uint32_t HS_EFL_AC = 0x40000;
 135     const uint32_t HS_EFL_ID = 0x200000;
 136     // Values for when we don't have a CPUID instruction.
 137     const int      CPU_FAMILY_SHIFT = 8;
 138     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 139     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 140     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 141 
 142     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 143     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 144     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 145     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 146 
 147     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 148 #   define __ _masm->
 149 
 150     address start = __ pc();
 151 
 152     //
 153     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 154     //
 155     // LP64: rcx and rdx are first and second argument registers on windows
 156 
 157     __ push(rbp);
 158 #ifdef _LP64
 159     __ mov(rbp, c_rarg0); // cpuid_info address
 160 #else
 161     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 162 #endif
 163     __ push(rbx);
 164     __ push(rsi);
 165     __ pushf();          // preserve rbx, and flags
 166     __ pop(rax);
 167     __ push(rax);
 168     __ mov(rcx, rax);
 169     //
 170     // if we are unable to change the AC flag, we have a 386
 171     //
 172     __ xorl(rax, HS_EFL_AC);
 173     __ push(rax);
 174     __ popf();
 175     __ pushf();
 176     __ pop(rax);
 177     __ cmpptr(rax, rcx);
 178     __ jccb(Assembler::notEqual, detect_486);
 179 
 180     __ movl(rax, CPU_FAMILY_386);
 181     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 182     __ jmp(done);
 183 
 184     //
 185     // If we are unable to change the ID flag, we have a 486 which does
 186     // not support the "cpuid" instruction.
 187     //
 188     __ bind(detect_486);
 189     __ mov(rax, rcx);
 190     __ xorl(rax, HS_EFL_ID);
 191     __ push(rax);
 192     __ popf();
 193     __ pushf();
 194     __ pop(rax);
 195     __ cmpptr(rcx, rax);
 196     __ jccb(Assembler::notEqual, detect_586);
 197 
 198     __ bind(cpu486);
 199     __ movl(rax, CPU_FAMILY_486);
 200     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 201     __ jmp(done);
 202 
 203     //
 204     // At this point, we have a chip which supports the "cpuid" instruction
 205     //
 206     __ bind(detect_586);
 207     __ xorl(rax, rax);
 208     __ cpuid();
 209     __ orl(rax, rax);
 210     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 211                                         // value of at least 1, we give up and
 212                                         // assume a 486
 213     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 214     __ movl(Address(rsi, 0), rax);
 215     __ movl(Address(rsi, 4), rbx);
 216     __ movl(Address(rsi, 8), rcx);
 217     __ movl(Address(rsi,12), rdx);
 218 
 219     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 220     __ jccb(Assembler::belowEqual, std_cpuid4);
 221 
 222     //
 223     // cpuid(0xB) Processor Topology
 224     //
 225     __ movl(rax, 0xb);
 226     __ xorl(rcx, rcx);   // Threads level
 227     __ cpuid();
 228 
 229     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 230     __ movl(Address(rsi, 0), rax);
 231     __ movl(Address(rsi, 4), rbx);
 232     __ movl(Address(rsi, 8), rcx);
 233     __ movl(Address(rsi,12), rdx);
 234 
 235     __ movl(rax, 0xb);
 236     __ movl(rcx, 1);     // Cores level
 237     __ cpuid();
 238     __ push(rax);
 239     __ andl(rax, 0x1f);  // Determine if valid topology level
 240     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 241     __ andl(rax, 0xffff);
 242     __ pop(rax);
 243     __ jccb(Assembler::equal, std_cpuid4);
 244 
 245     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 246     __ movl(Address(rsi, 0), rax);
 247     __ movl(Address(rsi, 4), rbx);
 248     __ movl(Address(rsi, 8), rcx);
 249     __ movl(Address(rsi,12), rdx);
 250 
 251     __ movl(rax, 0xb);
 252     __ movl(rcx, 2);     // Packages level
 253     __ cpuid();
 254     __ push(rax);
 255     __ andl(rax, 0x1f);  // Determine if valid topology level
 256     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 257     __ andl(rax, 0xffff);
 258     __ pop(rax);
 259     __ jccb(Assembler::equal, std_cpuid4);
 260 
 261     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 262     __ movl(Address(rsi, 0), rax);
 263     __ movl(Address(rsi, 4), rbx);
 264     __ movl(Address(rsi, 8), rcx);
 265     __ movl(Address(rsi,12), rdx);
 266 
 267     //
 268     // cpuid(0x4) Deterministic cache params
 269     //
 270     __ bind(std_cpuid4);
 271     __ movl(rax, 4);
 272     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 273     __ jccb(Assembler::greater, std_cpuid1);
 274 
 275     __ xorl(rcx, rcx);   // L1 cache
 276     __ cpuid();
 277     __ push(rax);
 278     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 279     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 280     __ pop(rax);
 281     __ jccb(Assembler::equal, std_cpuid1);
 282 
 283     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 284     __ movl(Address(rsi, 0), rax);
 285     __ movl(Address(rsi, 4), rbx);
 286     __ movl(Address(rsi, 8), rcx);
 287     __ movl(Address(rsi,12), rdx);
 288 
 289     //
 290     // Standard cpuid(0x1)
 291     //
 292     __ bind(std_cpuid1);
 293     __ movl(rax, 1);
 294     __ cpuid();
 295     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 296     __ movl(Address(rsi, 0), rax);
 297     __ movl(Address(rsi, 4), rbx);
 298     __ movl(Address(rsi, 8), rcx);
 299     __ movl(Address(rsi,12), rdx);
 300 
 301     //
 302     // Check if OS has enabled XGETBV instruction to access XCR0
 303     // (OSXSAVE feature flag) and CPU supports AVX
 304     //
 305     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 306     __ cmpl(rcx, 0x18000000);
 307     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 308 
 309     //
 310     // XCR0, XFEATURE_ENABLED_MASK register
 311     //
 312     __ xorl(rcx, rcx);   // zero for XCR0 register
 313     __ xgetbv();
 314     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 315     __ movl(Address(rsi, 0), rax);
 316     __ movl(Address(rsi, 4), rdx);
 317 
 318     //
 319     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 320     //
 321     __ bind(sef_cpuid);
 322     __ movl(rax, 7);
 323     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 324     __ jccb(Assembler::greater, ext_cpuid);
 325     // ECX = 0
 326     __ xorl(rcx, rcx);
 327     __ cpuid();
 328     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 329     __ movl(Address(rsi, 0), rax);
 330     __ movl(Address(rsi, 4), rbx);
 331     __ movl(Address(rsi, 8), rcx);
 332     __ movl(Address(rsi, 12), rdx);
 333 
 334     //
 335     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 336     //
 337     __ bind(sefsl1_cpuid);
 338     __ movl(rax, 7);
 339     __ movl(rcx, 1);
 340     __ cpuid();
 341     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 342     __ movl(Address(rsi, 0), rax);
 343     __ movl(Address(rsi, 4), rdx);
 344 
 345     //
 346     // Extended cpuid(0x80000000)
 347     //
 348     __ bind(ext_cpuid);
 349     __ movl(rax, 0x80000000);
 350     __ cpuid();
 351     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 352     __ jcc(Assembler::belowEqual, done);
 353     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 354     __ jcc(Assembler::belowEqual, ext_cpuid1);
 355     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 356     __ jccb(Assembler::belowEqual, ext_cpuid5);
 357     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 358     __ jccb(Assembler::belowEqual, ext_cpuid7);
 359     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 360     __ jccb(Assembler::belowEqual, ext_cpuid8);
 361     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 362     __ jccb(Assembler::below, ext_cpuid8);
 363     //
 364     // Extended cpuid(0x8000001E)
 365     //
 366     __ movl(rax, 0x8000001E);
 367     __ cpuid();
 368     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 369     __ movl(Address(rsi, 0), rax);
 370     __ movl(Address(rsi, 4), rbx);
 371     __ movl(Address(rsi, 8), rcx);
 372     __ movl(Address(rsi,12), rdx);
 373 
 374     //
 375     // Extended cpuid(0x80000008)
 376     //
 377     __ bind(ext_cpuid8);
 378     __ movl(rax, 0x80000008);
 379     __ cpuid();
 380     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 381     __ movl(Address(rsi, 0), rax);
 382     __ movl(Address(rsi, 4), rbx);
 383     __ movl(Address(rsi, 8), rcx);
 384     __ movl(Address(rsi,12), rdx);
 385 
 386     //
 387     // Extended cpuid(0x80000007)
 388     //
 389     __ bind(ext_cpuid7);
 390     __ movl(rax, 0x80000007);
 391     __ cpuid();
 392     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 393     __ movl(Address(rsi, 0), rax);
 394     __ movl(Address(rsi, 4), rbx);
 395     __ movl(Address(rsi, 8), rcx);
 396     __ movl(Address(rsi,12), rdx);
 397 
 398     //
 399     // Extended cpuid(0x80000005)
 400     //
 401     __ bind(ext_cpuid5);
 402     __ movl(rax, 0x80000005);
 403     __ cpuid();
 404     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 405     __ movl(Address(rsi, 0), rax);
 406     __ movl(Address(rsi, 4), rbx);
 407     __ movl(Address(rsi, 8), rcx);
 408     __ movl(Address(rsi,12), rdx);
 409 
 410     //
 411     // Extended cpuid(0x80000001)
 412     //
 413     __ bind(ext_cpuid1);
 414     __ movl(rax, 0x80000001);
 415     __ cpuid();
 416     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 417     __ movl(Address(rsi, 0), rax);
 418     __ movl(Address(rsi, 4), rbx);
 419     __ movl(Address(rsi, 8), rcx);
 420     __ movl(Address(rsi,12), rdx);
 421 
 422 #if defined(_LP64)
 423     //
 424     // Check if OS has enabled XGETBV instruction to access XCR0
 425     // (OSXSAVE feature flag) and CPU supports APX
 426     //
 427     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 428     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 429     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 430     __ movl(rax, 0x200000);
 431     __ andl(rax, Address(rsi, 4));
 432     __ cmpl(rax, 0x200000);
 433     __ jcc(Assembler::notEqual, vector_save_restore);
 434     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 435     __ movl(rax, 0x80000);
 436     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 437     __ cmpl(rax, 0x80000);
 438     __ jcc(Assembler::notEqual, vector_save_restore);
 439 
 440 #ifndef PRODUCT
 441     bool save_apx = UseAPX;
 442     VM_Version::set_apx_cpuFeatures();
 443     UseAPX = true;
 444     __ mov64(r16, VM_Version::egpr_test_value());
 445     __ mov64(r31, VM_Version::egpr_test_value());
 446     __ xorl(rsi, rsi);
 447     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 448     // Generate SEGV
 449     __ movl(rax, Address(rsi, 0));
 450 
 451     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 452     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 453     __ movq(Address(rsi, 0), r16);
 454     __ movq(Address(rsi, 8), r31);
 455 
 456     UseAPX = save_apx;
 457 #endif
 458 #endif
 459     __ bind(vector_save_restore);
 460     //
 461     // Check if OS has enabled XGETBV instruction to access XCR0
 462     // (OSXSAVE feature flag) and CPU supports AVX
 463     //
 464     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 465     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 466     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 467     __ cmpl(rcx, 0x18000000);
 468     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 469 
 470     __ movl(rax, 0x6);
 471     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 472     __ cmpl(rax, 0x6);
 473     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 474 
 475     // we need to bridge farther than imm8, so we use this island as a thunk
 476     __ bind(done);
 477     __ jmp(wrapup);
 478 
 479     __ bind(start_simd_check);
 480     //
 481     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 482     // registers are not restored after a signal processing.
 483     // Generate SEGV here (reference through null)
 484     // and check upper YMM/ZMM bits after it.
 485     //
 486     int saved_useavx = UseAVX;
 487     int saved_usesse = UseSSE;
 488 
 489     // If UseAVX is uninitialized or is set by the user to include EVEX
 490     if (use_evex) {
 491       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 492       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 493       __ movl(rax, 0x10000);
 494       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 495       __ cmpl(rax, 0x10000);
 496       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 497       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 498       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 499       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 500       __ movl(rax, 0xE0);
 501       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 502       __ cmpl(rax, 0xE0);
 503       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 504 
 505       if (FLAG_IS_DEFAULT(UseAVX)) {
 506         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 507         __ movl(rax, Address(rsi, 0));
 508         __ cmpl(rax, 0x50654);              // If it is Skylake
 509         __ jcc(Assembler::equal, legacy_setup);
 510       }
 511       // EVEX setup: run in lowest evex mode
 512       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 513       UseAVX = 3;
 514       UseSSE = 2;
 515 #ifdef _WINDOWS
 516       // xmm5-xmm15 are not preserved by caller on windows
 517       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 518       __ subptr(rsp, 64);
 519       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 520 #ifdef _LP64
 521       __ subptr(rsp, 64);
 522       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 523       __ subptr(rsp, 64);
 524       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 525 #endif // _LP64
 526 #endif // _WINDOWS
 527 
 528       // load value into all 64 bytes of zmm7 register
 529       __ movl(rcx, VM_Version::ymm_test_value());
 530       __ movdl(xmm0, rcx);
 531       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 532       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 533 #ifdef _LP64
 534       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 535       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 536 #endif
 537       VM_Version::clean_cpuFeatures();
 538       __ jmp(save_restore_except);
 539     }
 540 
 541     __ bind(legacy_setup);
 542     // AVX setup
 543     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 544     UseAVX = 1;
 545     UseSSE = 2;
 546 #ifdef _WINDOWS
 547     __ subptr(rsp, 32);
 548     __ vmovdqu(Address(rsp, 0), xmm7);
 549 #ifdef _LP64
 550     __ subptr(rsp, 32);
 551     __ vmovdqu(Address(rsp, 0), xmm8);
 552     __ subptr(rsp, 32);
 553     __ vmovdqu(Address(rsp, 0), xmm15);
 554 #endif // _LP64
 555 #endif // _WINDOWS
 556 
 557     // load value into all 32 bytes of ymm7 register
 558     __ movl(rcx, VM_Version::ymm_test_value());
 559 
 560     __ movdl(xmm0, rcx);
 561     __ pshufd(xmm0, xmm0, 0x00);
 562     __ vinsertf128_high(xmm0, xmm0);
 563     __ vmovdqu(xmm7, xmm0);
 564 #ifdef _LP64
 565     __ vmovdqu(xmm8, xmm0);
 566     __ vmovdqu(xmm15, xmm0);
 567 #endif
 568     VM_Version::clean_cpuFeatures();
 569 
 570     __ bind(save_restore_except);
 571     __ xorl(rsi, rsi);
 572     VM_Version::set_cpuinfo_segv_addr(__ pc());
 573     // Generate SEGV
 574     __ movl(rax, Address(rsi, 0));
 575 
 576     VM_Version::set_cpuinfo_cont_addr(__ pc());
 577     // Returns here after signal. Save xmm0 to check it later.
 578 
 579     // If UseAVX is uninitialized or is set by the user to include EVEX
 580     if (use_evex) {
 581       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 582       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 583       __ movl(rax, 0x10000);
 584       __ andl(rax, Address(rsi, 4));
 585       __ cmpl(rax, 0x10000);
 586       __ jcc(Assembler::notEqual, legacy_save_restore);
 587       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 588       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 589       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 590       __ movl(rax, 0xE0);
 591       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 592       __ cmpl(rax, 0xE0);
 593       __ jcc(Assembler::notEqual, legacy_save_restore);
 594 
 595       if (FLAG_IS_DEFAULT(UseAVX)) {
 596         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 597         __ movl(rax, Address(rsi, 0));
 598         __ cmpl(rax, 0x50654);              // If it is Skylake
 599         __ jcc(Assembler::equal, legacy_save_restore);
 600       }
 601       // EVEX check: run in lowest evex mode
 602       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 603       UseAVX = 3;
 604       UseSSE = 2;
 605       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 606       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 607       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 608 #ifdef _LP64
 609       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 610       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 611 #endif
 612 
 613 #ifdef _WINDOWS
 614 #ifdef _LP64
 615       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 616       __ addptr(rsp, 64);
 617       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 618       __ addptr(rsp, 64);
 619 #endif // _LP64
 620       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 621       __ addptr(rsp, 64);
 622 #endif // _WINDOWS
 623       generate_vzeroupper(wrapup);
 624       VM_Version::clean_cpuFeatures();
 625       UseAVX = saved_useavx;
 626       UseSSE = saved_usesse;
 627       __ jmp(wrapup);
 628    }
 629 
 630     __ bind(legacy_save_restore);
 631     // AVX check
 632     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 633     UseAVX = 1;
 634     UseSSE = 2;
 635     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 636     __ vmovdqu(Address(rsi, 0), xmm0);
 637     __ vmovdqu(Address(rsi, 32), xmm7);
 638 #ifdef _LP64
 639     __ vmovdqu(Address(rsi, 64), xmm8);
 640     __ vmovdqu(Address(rsi, 96), xmm15);
 641 #endif
 642 
 643 #ifdef _WINDOWS
 644 #ifdef _LP64
 645     __ vmovdqu(xmm15, Address(rsp, 0));
 646     __ addptr(rsp, 32);
 647     __ vmovdqu(xmm8, Address(rsp, 0));
 648     __ addptr(rsp, 32);
 649 #endif // _LP64
 650     __ vmovdqu(xmm7, Address(rsp, 0));
 651     __ addptr(rsp, 32);
 652 #endif // _WINDOWS
 653 
 654     generate_vzeroupper(wrapup);
 655     VM_Version::clean_cpuFeatures();
 656     UseAVX = saved_useavx;
 657     UseSSE = saved_usesse;
 658 
 659     __ bind(wrapup);
 660     __ popf();
 661     __ pop(rsi);
 662     __ pop(rbx);
 663     __ pop(rbp);
 664     __ ret(0);
 665 
 666 #   undef __
 667 
 668     return start;
 669   };
 670   void generate_vzeroupper(Label& L_wrapup) {
 671 #   define __ _masm->
 672     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 673     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 674     __ jcc(Assembler::notEqual, L_wrapup);
 675     __ movl(rcx, 0x0FFF0FF0);
 676     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 677     __ andl(rcx, Address(rsi, 0));
 678     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 679     __ jcc(Assembler::equal, L_wrapup);
 680     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 681     __ jcc(Assembler::equal, L_wrapup);
 682     // vzeroupper() will use a pre-computed instruction sequence that we
 683     // can't compute until after we've determined CPU capabilities. Use
 684     // uncached variant here directly to be able to bootstrap correctly
 685     __ vzeroupper_uncached();
 686 #   undef __
 687   }
 688   address generate_detect_virt() {
 689     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 690 #   define __ _masm->
 691 
 692     address start = __ pc();
 693 
 694     // Evacuate callee-saved registers
 695     __ push(rbp);
 696     __ push(rbx);
 697     __ push(rsi); // for Windows
 698 
 699 #ifdef _LP64
 700     __ mov(rax, c_rarg0); // CPUID leaf
 701     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 702 #else
 703     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 704     __ movptr(rsi, Address(rsp, 20)); // register array address
 705 #endif
 706 
 707     __ cpuid();
 708 
 709     // Store result to register array
 710     __ movl(Address(rsi,  0), rax);
 711     __ movl(Address(rsi,  4), rbx);
 712     __ movl(Address(rsi,  8), rcx);
 713     __ movl(Address(rsi, 12), rdx);
 714 
 715     // Epilogue
 716     __ pop(rsi);
 717     __ pop(rbx);
 718     __ pop(rbp);
 719     __ ret(0);
 720 
 721 #   undef __
 722 
 723     return start;
 724   };
 725 
 726 
 727   address generate_getCPUIDBrandString(void) {
 728     // Flags to test CPU type.
 729     const uint32_t HS_EFL_AC           = 0x40000;
 730     const uint32_t HS_EFL_ID           = 0x200000;
 731     // Values for when we don't have a CPUID instruction.
 732     const int      CPU_FAMILY_SHIFT = 8;
 733     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 734     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 735 
 736     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 737 
 738     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 739 #   define __ _masm->
 740 
 741     address start = __ pc();
 742 
 743     //
 744     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 745     //
 746     // LP64: rcx and rdx are first and second argument registers on windows
 747 
 748     __ push(rbp);
 749 #ifdef _LP64
 750     __ mov(rbp, c_rarg0); // cpuid_info address
 751 #else
 752     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 753 #endif
 754     __ push(rbx);
 755     __ push(rsi);
 756     __ pushf();          // preserve rbx, and flags
 757     __ pop(rax);
 758     __ push(rax);
 759     __ mov(rcx, rax);
 760     //
 761     // if we are unable to change the AC flag, we have a 386
 762     //
 763     __ xorl(rax, HS_EFL_AC);
 764     __ push(rax);
 765     __ popf();
 766     __ pushf();
 767     __ pop(rax);
 768     __ cmpptr(rax, rcx);
 769     __ jccb(Assembler::notEqual, detect_486);
 770 
 771     __ movl(rax, CPU_FAMILY_386);
 772     __ jmp(done);
 773 
 774     //
 775     // If we are unable to change the ID flag, we have a 486 which does
 776     // not support the "cpuid" instruction.
 777     //
 778     __ bind(detect_486);
 779     __ mov(rax, rcx);
 780     __ xorl(rax, HS_EFL_ID);
 781     __ push(rax);
 782     __ popf();
 783     __ pushf();
 784     __ pop(rax);
 785     __ cmpptr(rcx, rax);
 786     __ jccb(Assembler::notEqual, detect_586);
 787 
 788     __ bind(cpu486);
 789     __ movl(rax, CPU_FAMILY_486);
 790     __ jmp(done);
 791 
 792     //
 793     // At this point, we have a chip which supports the "cpuid" instruction
 794     //
 795     __ bind(detect_586);
 796     __ xorl(rax, rax);
 797     __ cpuid();
 798     __ orl(rax, rax);
 799     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 800                                         // value of at least 1, we give up and
 801                                         // assume a 486
 802 
 803     //
 804     // Extended cpuid(0x80000000) for processor brand string detection
 805     //
 806     __ bind(ext_cpuid);
 807     __ movl(rax, CPUID_EXTENDED_FN);
 808     __ cpuid();
 809     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 810     __ jcc(Assembler::below, done);
 811 
 812     //
 813     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 814     //
 815     __ movl(rax, CPUID_EXTENDED_FN_2);
 816     __ cpuid();
 817     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 818     __ movl(Address(rsi, 0), rax);
 819     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 820     __ movl(Address(rsi, 0), rbx);
 821     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 822     __ movl(Address(rsi, 0), rcx);
 823     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 824     __ movl(Address(rsi,0), rdx);
 825 
 826     //
 827     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 828     //
 829     __ movl(rax, CPUID_EXTENDED_FN_3);
 830     __ cpuid();
 831     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 832     __ movl(Address(rsi, 0), rax);
 833     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 834     __ movl(Address(rsi, 0), rbx);
 835     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 836     __ movl(Address(rsi, 0), rcx);
 837     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 838     __ movl(Address(rsi,0), rdx);
 839 
 840     //
 841     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 842     //
 843     __ movl(rax, CPUID_EXTENDED_FN_4);
 844     __ cpuid();
 845     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 846     __ movl(Address(rsi, 0), rax);
 847     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 848     __ movl(Address(rsi, 0), rbx);
 849     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 850     __ movl(Address(rsi, 0), rcx);
 851     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 852     __ movl(Address(rsi,0), rdx);
 853 
 854     //
 855     // return
 856     //
 857     __ bind(done);
 858     __ popf();
 859     __ pop(rsi);
 860     __ pop(rbx);
 861     __ pop(rbp);
 862     __ ret(0);
 863 
 864 #   undef __
 865 
 866     return start;
 867   };
 868 };
 869 
 870 void VM_Version::get_processor_features() {
 871 
 872   _cpu = 4; // 486 by default
 873   _model = 0;
 874   _stepping = 0;
 875   _features = 0;
 876   _logical_processors_per_package = 1;
 877   // i486 internal cache is both I&D and has a 16-byte line size
 878   _L1_data_cache_line_size = 16;
 879 
 880   // Get raw processor info
 881 
 882   get_cpu_info_stub(&_cpuid_info);
 883 
 884   assert_is_initialized();
 885   _cpu = extended_cpu_family();
 886   _model = extended_cpu_model();
 887   _stepping = cpu_stepping();
 888 
 889   if (cpu_family() > 4) { // it supports CPUID
 890     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 891     _cpu_features = _features;   // Preserve features
 892     // Logical processors are only available on P4s and above,
 893     // and only if hyperthreading is available.
 894     _logical_processors_per_package = logical_processor_count();
 895     _L1_data_cache_line_size = L1_line_size();
 896   }
 897 
 898   // xchg and xadd instructions
 899   _supports_atomic_getset4 = true;
 900   _supports_atomic_getadd4 = true;
 901   LP64_ONLY(_supports_atomic_getset8 = true);
 902   LP64_ONLY(_supports_atomic_getadd8 = true);
 903 
 904 #ifdef _LP64
 905   // OS should support SSE for x64 and hardware should support at least SSE2.
 906   if (!VM_Version::supports_sse2()) {
 907     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 908   }
 909   // in 64 bit the use of SSE2 is the minimum
 910   if (UseSSE < 2) UseSSE = 2;
 911 #endif
 912 
 913 #ifdef AMD64
 914   // flush_icache_stub have to be generated first.
 915   // That is why Icache line size is hard coded in ICache class,
 916   // see icache_x86.hpp. It is also the reason why we can't use
 917   // clflush instruction in 32-bit VM since it could be running
 918   // on CPU which does not support it.
 919   //
 920   // The only thing we can do is to verify that flushed
 921   // ICache::line_size has correct value.
 922   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 923   // clflush_size is size in quadwords (8 bytes).
 924   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 925 #endif
 926 
 927 #ifdef _LP64
 928   // assigning this field effectively enables Unsafe.writebackMemory()
 929   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 930   // that is only implemented on x86_64 and only if the OS plays ball
 931   if (os::supports_map_sync()) {
 932     // publish data cache line flush size to generic field, otherwise
 933     // let if default to zero thereby disabling writeback
 934     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 935   }
 936 #endif
 937 
 938   // Check if processor has Intel Ecore
 939   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 940     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
 941     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 942   }
 943 
 944   if (UseSSE < 4) {
 945     _features &= ~CPU_SSE4_1;
 946     _features &= ~CPU_SSE4_2;
 947   }
 948 
 949   if (UseSSE < 3) {
 950     _features &= ~CPU_SSE3;
 951     _features &= ~CPU_SSSE3;
 952     _features &= ~CPU_SSE4A;
 953   }
 954 
 955   if (UseSSE < 2)
 956     _features &= ~CPU_SSE2;
 957 
 958   if (UseSSE < 1)
 959     _features &= ~CPU_SSE;
 960 
 961   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 962   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 963     UseAVX = 0;
 964   }
 965 
 966   // UseSSE is set to the smaller of what hardware supports and what
 967   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 968   // older Pentiums which do not support it.
 969   int use_sse_limit = 0;
 970   if (UseSSE > 0) {
 971     if (UseSSE > 3 && supports_sse4_1()) {
 972       use_sse_limit = 4;
 973     } else if (UseSSE > 2 && supports_sse3()) {
 974       use_sse_limit = 3;
 975     } else if (UseSSE > 1 && supports_sse2()) {
 976       use_sse_limit = 2;
 977     } else if (UseSSE > 0 && supports_sse()) {
 978       use_sse_limit = 1;
 979     } else {
 980       use_sse_limit = 0;
 981     }
 982   }
 983   if (FLAG_IS_DEFAULT(UseSSE)) {
 984     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 985   } else if (UseSSE > use_sse_limit) {
 986     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 987     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 988   }
 989 
 990   // first try initial setting and detect what we can support
 991   int use_avx_limit = 0;
 992   if (UseAVX > 0) {
 993     if (UseSSE < 4) {
 994       // Don't use AVX if SSE is unavailable or has been disabled.
 995       use_avx_limit = 0;
 996     } else if (UseAVX > 2 && supports_evex()) {
 997       use_avx_limit = 3;
 998     } else if (UseAVX > 1 && supports_avx2()) {
 999       use_avx_limit = 2;
1000     } else if (UseAVX > 0 && supports_avx()) {
1001       use_avx_limit = 1;
1002     } else {
1003       use_avx_limit = 0;
1004     }
1005   }
1006   if (FLAG_IS_DEFAULT(UseAVX)) {
1007     // Don't use AVX-512 on older Skylakes unless explicitly requested.
1008     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1009       FLAG_SET_DEFAULT(UseAVX, 2);
1010     } else {
1011       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1012     }
1013   }
1014 
1015   if (UseAVX > use_avx_limit) {
1016     if (UseSSE < 4) {
1017       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1018     } else {
1019       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1020     }
1021     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1022   }
1023 
1024   if (UseAVX < 3) {
1025     _features &= ~CPU_AVX512F;
1026     _features &= ~CPU_AVX512DQ;
1027     _features &= ~CPU_AVX512CD;
1028     _features &= ~CPU_AVX512BW;
1029     _features &= ~CPU_AVX512VL;
1030     _features &= ~CPU_AVX512_VPOPCNTDQ;
1031     _features &= ~CPU_AVX512_VPCLMULQDQ;
1032     _features &= ~CPU_AVX512_VAES;
1033     _features &= ~CPU_AVX512_VNNI;
1034     _features &= ~CPU_AVX512_VBMI;
1035     _features &= ~CPU_AVX512_VBMI2;
1036     _features &= ~CPU_AVX512_BITALG;
1037     _features &= ~CPU_AVX512_IFMA;
1038     _features &= ~CPU_APX_F;
1039   }
1040 
1041   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1042   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1043   if (UseAPX && !apx_supported) {
1044     warning("UseAPX is not supported on this CPU, setting it to false");
1045     FLAG_SET_DEFAULT(UseAPX, false);
1046   } else if (FLAG_IS_DEFAULT(UseAPX)) {
1047     FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1048   }
1049 
1050   if (!UseAPX) {
1051     _features &= ~CPU_APX_F;
1052   }
1053 
1054   if (UseAVX < 2) {
1055     _features &= ~CPU_AVX2;
1056     _features &= ~CPU_AVX_IFMA;
1057   }
1058 
1059   if (UseAVX < 1) {
1060     _features &= ~CPU_AVX;
1061     _features &= ~CPU_VZEROUPPER;
1062     _features &= ~CPU_F16C;
1063     _features &= ~CPU_SHA512;
1064   }
1065 
1066   if (logical_processors_per_package() == 1) {
1067     // HT processor could be installed on a system which doesn't support HT.
1068     _features &= ~CPU_HT;
1069   }
1070 
1071   if (is_intel()) { // Intel cpus specific settings
1072     if (is_knights_family()) {
1073       _features &= ~CPU_VZEROUPPER;
1074       _features &= ~CPU_AVX512BW;
1075       _features &= ~CPU_AVX512VL;
1076       _features &= ~CPU_AVX512DQ;
1077       _features &= ~CPU_AVX512_VNNI;
1078       _features &= ~CPU_AVX512_VAES;
1079       _features &= ~CPU_AVX512_VPOPCNTDQ;
1080       _features &= ~CPU_AVX512_VPCLMULQDQ;
1081       _features &= ~CPU_AVX512_VBMI;
1082       _features &= ~CPU_AVX512_VBMI2;
1083       _features &= ~CPU_CLWB;
1084       _features &= ~CPU_FLUSHOPT;
1085       _features &= ~CPU_GFNI;
1086       _features &= ~CPU_AVX512_BITALG;
1087       _features &= ~CPU_AVX512_IFMA;
1088       _features &= ~CPU_AVX_IFMA;
1089     }
1090   }
1091 
1092   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1093     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1094   } else {
1095     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1096   }
1097 
1098   char buf[1024];
1099   int res = jio_snprintf(
1100               buf, sizeof(buf),
1101               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1102               cores_per_cpu(), threads_per_core(),
1103               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1104   assert(res > 0, "not enough temporary space allocated");
1105   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1106 
1107   _features_string = os::strdup(buf);
1108 
1109   // Use AES instructions if available.
1110   if (supports_aes()) {
1111     if (FLAG_IS_DEFAULT(UseAES)) {
1112       FLAG_SET_DEFAULT(UseAES, true);
1113     }
1114     if (!UseAES) {
1115       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1116         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1117       }
1118       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1119     } else {
1120       if (UseSSE > 2) {
1121         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1122           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1123         }
1124       } else {
1125         // The AES intrinsic stubs require AES instruction support (of course)
1126         // but also require sse3 mode or higher for instructions it use.
1127         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1128           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1129         }
1130         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1131       }
1132 
1133       // --AES-CTR begins--
1134       if (!UseAESIntrinsics) {
1135         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1136           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1137           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1138         }
1139       } else {
1140         if (supports_sse4_1()) {
1141           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1142             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1143           }
1144         } else {
1145            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1146            // but also require sse4.1 mode or higher for instructions it use.
1147           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1148              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1149            }
1150            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1151         }
1152       }
1153       // --AES-CTR ends--
1154     }
1155   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1156     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1157       warning("AES instructions are not available on this CPU");
1158       FLAG_SET_DEFAULT(UseAES, false);
1159     }
1160     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1161       warning("AES intrinsics are not available on this CPU");
1162       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1163     }
1164     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1165       warning("AES-CTR intrinsics are not available on this CPU");
1166       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1167     }
1168   }
1169 
1170   // Use CLMUL instructions if available.
1171   if (supports_clmul()) {
1172     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1173       UseCLMUL = true;
1174     }
1175   } else if (UseCLMUL) {
1176     if (!FLAG_IS_DEFAULT(UseCLMUL))
1177       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1178     FLAG_SET_DEFAULT(UseCLMUL, false);
1179   }
1180 
1181   if (UseCLMUL && (UseSSE > 2)) {
1182     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1183       UseCRC32Intrinsics = true;
1184     }
1185   } else if (UseCRC32Intrinsics) {
1186     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1187       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1188     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1189   }
1190 
1191 #ifdef _LP64
1192   if (supports_avx2()) {
1193     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1194       UseAdler32Intrinsics = true;
1195     }
1196   } else if (UseAdler32Intrinsics) {
1197     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1198       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1199     }
1200     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1201   }
1202 #else
1203   if (UseAdler32Intrinsics) {
1204     warning("Adler32Intrinsics not available on this CPU.");
1205     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1206   }
1207 #endif
1208 
1209   if (supports_sse4_2() && supports_clmul()) {
1210     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1211       UseCRC32CIntrinsics = true;
1212     }
1213   } else if (UseCRC32CIntrinsics) {
1214     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1215       warning("CRC32C intrinsics are not available on this CPU");
1216     }
1217     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1218   }
1219 
1220   // GHASH/GCM intrinsics
1221   if (UseCLMUL && (UseSSE > 2)) {
1222     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1223       UseGHASHIntrinsics = true;
1224     }
1225   } else if (UseGHASHIntrinsics) {
1226     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1227       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1228     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1229   }
1230 
1231 #ifdef _LP64
1232   // ChaCha20 Intrinsics
1233   // As long as the system supports AVX as a baseline we can do a
1234   // SIMD-enabled block function.  StubGenerator makes the determination
1235   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1236   // version.
1237   if (UseAVX >= 1) {
1238       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1239           UseChaCha20Intrinsics = true;
1240       }
1241   } else if (UseChaCha20Intrinsics) {
1242       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1243           warning("ChaCha20 intrinsic requires AVX instructions");
1244       }
1245       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1246   }
1247 #else
1248   // No support currently for ChaCha20 intrinsics on 32-bit platforms
1249   if (UseChaCha20Intrinsics) {
1250       warning("ChaCha20 intrinsics are not available on this CPU.");
1251       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1252   }
1253 #endif // _LP64
1254 
1255   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1256   if (UseAVX >= 2) {
1257     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1258       UseBASE64Intrinsics = true;
1259     }
1260   } else if (UseBASE64Intrinsics) {
1261      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1262       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1263     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1264   }
1265 
1266   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1267     if (FLAG_IS_DEFAULT(UseFMA)) {
1268       UseFMA = true;
1269     }
1270   } else if (UseFMA) {
1271     warning("FMA instructions are not available on this CPU");
1272     FLAG_SET_DEFAULT(UseFMA, false);
1273   }
1274 
1275   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1276     UseMD5Intrinsics = true;
1277   }
1278 
1279   if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1280     if (FLAG_IS_DEFAULT(UseSHA)) {
1281       UseSHA = true;
1282     }
1283   } else if (UseSHA) {
1284     warning("SHA instructions are not available on this CPU");
1285     FLAG_SET_DEFAULT(UseSHA, false);
1286   }
1287 
1288   if (supports_sha() && supports_sse4_1() && UseSHA) {
1289     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1290       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1291     }
1292   } else if (UseSHA1Intrinsics) {
1293     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1294     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1295   }
1296 
1297   if (supports_sse4_1() && UseSHA) {
1298     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1299       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1300     }
1301   } else if (UseSHA256Intrinsics) {
1302     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1303     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1304   }
1305 
1306 #ifdef _LP64
1307   // These are only supported on 64-bit
1308   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1309     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1310       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1311     }
1312   } else
1313 #endif
1314   if (UseSHA512Intrinsics) {
1315     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1316     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1317   }
1318 
1319 #ifdef _LP64
1320   if (supports_evex() && supports_avx512bw()) {
1321       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1322           UseSHA3Intrinsics = true;
1323       }
1324   } else
1325 #endif
1326    if (UseSHA3Intrinsics) {
1327       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1328       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1329   }
1330 
1331   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1332     FLAG_SET_DEFAULT(UseSHA, false);
1333   }
1334 
1335 #ifdef COMPILER2
1336   if (UseFPUForSpilling) {
1337     if (UseSSE < 2) {
1338       // Only supported with SSE2+
1339       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1340     }
1341   }
1342 #endif
1343 
1344 #if COMPILER2_OR_JVMCI
1345   int max_vector_size = 0;
1346   if (UseSSE < 2) {
1347     // Vectors (in XMM) are only supported with SSE2+
1348     // SSE is always 2 on x64.
1349     max_vector_size = 0;
1350   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1351     // 16 byte vectors (in XMM) are supported with SSE2+
1352     max_vector_size = 16;
1353   } else if (UseAVX == 1 || UseAVX == 2) {
1354     // 32 bytes vectors (in YMM) are only supported with AVX+
1355     max_vector_size = 32;
1356   } else if (UseAVX > 2) {
1357     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1358     max_vector_size = 64;
1359   }
1360 
1361 #ifdef _LP64
1362   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1363 #else
1364   int min_vector_size = 0;
1365 #endif
1366 
1367   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1368     if (MaxVectorSize < min_vector_size) {
1369       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1370       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1371     }
1372     if (MaxVectorSize > max_vector_size) {
1373       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1374       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1375     }
1376     if (!is_power_of_2(MaxVectorSize)) {
1377       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1378       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1379     }
1380   } else {
1381     // If default, use highest supported configuration
1382     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1383   }
1384 
1385 #if defined(COMPILER2) && defined(ASSERT)
1386   if (MaxVectorSize > 0) {
1387     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1388       tty->print_cr("State of YMM registers after signal handle:");
1389       int nreg = 2 LP64_ONLY(+2);
1390       const char* ymm_name[4] = {"0", "7", "8", "15"};
1391       for (int i = 0; i < nreg; i++) {
1392         tty->print("YMM%s:", ymm_name[i]);
1393         for (int j = 7; j >=0; j--) {
1394           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1395         }
1396         tty->cr();
1397       }
1398     }
1399   }
1400 #endif // COMPILER2 && ASSERT
1401 
1402 #ifdef _LP64
1403   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1404     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1405       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1406     }
1407   } else
1408 #endif
1409   if (UsePoly1305Intrinsics) {
1410     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1411     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1412   }
1413 
1414 #ifdef _LP64
1415   if (supports_avx512ifma() && supports_avx512vlbw()) {
1416     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1417       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1418     }
1419   } else
1420 #endif
1421   if (UseIntPolyIntrinsics) {
1422     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1423     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1424   }
1425 
1426 #ifdef _LP64
1427   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1428     UseMultiplyToLenIntrinsic = true;
1429   }
1430   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1431     UseSquareToLenIntrinsic = true;
1432   }
1433   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1434     UseMulAddIntrinsic = true;
1435   }
1436   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1437     UseMontgomeryMultiplyIntrinsic = true;
1438   }
1439   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1440     UseMontgomerySquareIntrinsic = true;
1441   }
1442 #else
1443   if (UseMultiplyToLenIntrinsic) {
1444     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1445       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1446     }
1447     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1448   }
1449   if (UseMontgomeryMultiplyIntrinsic) {
1450     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1451       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1452     }
1453     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1454   }
1455   if (UseMontgomerySquareIntrinsic) {
1456     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1457       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1458     }
1459     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1460   }
1461   if (UseSquareToLenIntrinsic) {
1462     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1463       warning("squareToLen intrinsic is not available in 32-bit VM");
1464     }
1465     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1466   }
1467   if (UseMulAddIntrinsic) {
1468     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1469       warning("mulAdd intrinsic is not available in 32-bit VM");
1470     }
1471     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1472   }
1473 #endif // _LP64
1474 #endif // COMPILER2_OR_JVMCI
1475 
1476   // On new cpus instructions which update whole XMM register should be used
1477   // to prevent partial register stall due to dependencies on high half.
1478   //
1479   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1480   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1481   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1482   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1483 
1484 
1485   if (is_zx()) { // ZX cpus specific settings
1486     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1487       UseStoreImmI16 = false; // don't use it on ZX cpus
1488     }
1489     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1490       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1491         // Use it on all ZX cpus
1492         UseAddressNop = true;
1493       }
1494     }
1495     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1496       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1497     }
1498     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1499       if (supports_sse3()) {
1500         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1501       } else {
1502         UseXmmRegToRegMoveAll = false;
1503       }
1504     }
1505     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1506 #ifdef COMPILER2
1507       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1508         // For new ZX cpus do the next optimization:
1509         // don't align the beginning of a loop if there are enough instructions
1510         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1511         // in current fetch line (OptoLoopAlignment) or the padding
1512         // is big (> MaxLoopPad).
1513         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1514         // generated NOP instructions. 11 is the largest size of one
1515         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1516         MaxLoopPad = 11;
1517       }
1518 #endif // COMPILER2
1519       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1520         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1521       }
1522       if (supports_sse4_2()) { // new ZX cpus
1523         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1524           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1525         }
1526       }
1527       if (supports_sse4_2()) {
1528         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1529           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1530         }
1531       } else {
1532         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1533           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1534         }
1535         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1536       }
1537     }
1538 
1539     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1540       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1541     }
1542   }
1543 
1544   if (is_amd_family()) { // AMD cpus specific settings
1545     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1546       // Use it on new AMD cpus starting from Opteron.
1547       UseAddressNop = true;
1548     }
1549     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1550       // Use it on new AMD cpus starting from Opteron.
1551       UseNewLongLShift = true;
1552     }
1553     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1554       if (supports_sse4a()) {
1555         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1556       } else {
1557         UseXmmLoadAndClearUpper = false;
1558       }
1559     }
1560     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1561       if (supports_sse4a()) {
1562         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1563       } else {
1564         UseXmmRegToRegMoveAll = false;
1565       }
1566     }
1567     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1568       if (supports_sse4a()) {
1569         UseXmmI2F = true;
1570       } else {
1571         UseXmmI2F = false;
1572       }
1573     }
1574     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1575       if (supports_sse4a()) {
1576         UseXmmI2D = true;
1577       } else {
1578         UseXmmI2D = false;
1579       }
1580     }
1581     if (supports_sse4_2()) {
1582       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1583         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1584       }
1585     } else {
1586       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1587         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1588       }
1589       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1590     }
1591 
1592     // some defaults for AMD family 15h
1593     if (cpu_family() == 0x15) {
1594       // On family 15h processors default is no sw prefetch
1595       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1596         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1597       }
1598       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1599       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1600         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1601       }
1602       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1603       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1604         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1605       }
1606       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1607         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1608       }
1609     }
1610 
1611 #ifdef COMPILER2
1612     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1613       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1614       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1615     }
1616 #endif // COMPILER2
1617 
1618     // Some defaults for AMD family >= 17h && Hygon family 18h
1619     if (cpu_family() >= 0x17) {
1620       // On family >=17h processors use XMM and UnalignedLoadStores
1621       // for Array Copy
1622       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1623         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1624       }
1625       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1626         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1627       }
1628 #ifdef COMPILER2
1629       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1630         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1631       }
1632 #endif
1633     }
1634   }
1635 
1636   if (is_intel()) { // Intel cpus specific settings
1637     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1638       UseStoreImmI16 = false; // don't use it on Intel cpus
1639     }
1640     if (cpu_family() == 6 || cpu_family() == 15) {
1641       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1642         // Use it on all Intel cpus starting from PentiumPro
1643         UseAddressNop = true;
1644       }
1645     }
1646     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1647       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1648     }
1649     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1650       if (supports_sse3()) {
1651         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1652       } else {
1653         UseXmmRegToRegMoveAll = false;
1654       }
1655     }
1656     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1657 #ifdef COMPILER2
1658       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1659         // For new Intel cpus do the next optimization:
1660         // don't align the beginning of a loop if there are enough instructions
1661         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1662         // in current fetch line (OptoLoopAlignment) or the padding
1663         // is big (> MaxLoopPad).
1664         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1665         // generated NOP instructions. 11 is the largest size of one
1666         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1667         MaxLoopPad = 11;
1668       }
1669 #endif // COMPILER2
1670 
1671       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1672         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1673       }
1674       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1675         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1676           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1677         }
1678       }
1679       if (supports_sse4_2()) {
1680         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1681           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1682         }
1683       } else {
1684         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1685           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1686         }
1687         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1688       }
1689     }
1690     if (is_atom_family() || is_knights_family()) {
1691 #ifdef COMPILER2
1692       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1693         OptoScheduling = true;
1694       }
1695 #endif
1696       if (supports_sse4_2()) { // Silvermont
1697         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1698           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1699         }
1700       }
1701       if (FLAG_IS_DEFAULT(UseIncDec)) {
1702         FLAG_SET_DEFAULT(UseIncDec, false);
1703       }
1704     }
1705     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1706       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1707     }
1708 #ifdef COMPILER2
1709     if (UseAVX > 2) {
1710       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1711           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1712            ArrayOperationPartialInlineSize != 0 &&
1713            ArrayOperationPartialInlineSize != 16 &&
1714            ArrayOperationPartialInlineSize != 32 &&
1715            ArrayOperationPartialInlineSize != 64)) {
1716         int inline_size = 0;
1717         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1718           inline_size = 64;
1719         } else if (MaxVectorSize >= 32) {
1720           inline_size = 32;
1721         } else if (MaxVectorSize >= 16) {
1722           inline_size = 16;
1723         }
1724         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1725           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1726         }
1727         ArrayOperationPartialInlineSize = inline_size;
1728       }
1729 
1730       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1731         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1732         if (ArrayOperationPartialInlineSize) {
1733           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1734         } else {
1735           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1736         }
1737       }
1738     }
1739 #endif
1740   }
1741 
1742 #ifdef COMPILER2
1743   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1744     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1745       OptimizeFill = false;
1746     }
1747   }
1748 #endif
1749 
1750 #ifdef _LP64
1751   if (UseSSE42Intrinsics) {
1752     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1753       UseVectorizedMismatchIntrinsic = true;
1754     }
1755   } else if (UseVectorizedMismatchIntrinsic) {
1756     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1757       warning("vectorizedMismatch intrinsics are not available on this CPU");
1758     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1759   }
1760   if (UseAVX >= 2) {
1761     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1762   } else if (UseVectorizedHashCodeIntrinsic) {
1763     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1764       warning("vectorizedHashCode intrinsics are not available on this CPU");
1765     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1766   }
1767 #else
1768   if (UseVectorizedMismatchIntrinsic) {
1769     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1770       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1771     }
1772     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1773   }
1774   if (UseVectorizedHashCodeIntrinsic) {
1775     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1776       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1777     }
1778     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1779   }
1780 #endif // _LP64
1781 
1782   // Use count leading zeros count instruction if available.
1783   if (supports_lzcnt()) {
1784     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1785       UseCountLeadingZerosInstruction = true;
1786     }
1787    } else if (UseCountLeadingZerosInstruction) {
1788     warning("lzcnt instruction is not available on this CPU");
1789     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1790   }
1791 
1792   // Use count trailing zeros instruction if available
1793   if (supports_bmi1()) {
1794     // tzcnt does not require VEX prefix
1795     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1796       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1797         // Don't use tzcnt if BMI1 is switched off on command line.
1798         UseCountTrailingZerosInstruction = false;
1799       } else {
1800         UseCountTrailingZerosInstruction = true;
1801       }
1802     }
1803   } else if (UseCountTrailingZerosInstruction) {
1804     warning("tzcnt instruction is not available on this CPU");
1805     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1806   }
1807 
1808   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1809   // VEX prefix is generated only when AVX > 0.
1810   if (supports_bmi1() && supports_avx()) {
1811     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1812       UseBMI1Instructions = true;
1813     }
1814   } else if (UseBMI1Instructions) {
1815     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1816     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1817   }
1818 
1819   if (supports_bmi2() && supports_avx()) {
1820     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1821       UseBMI2Instructions = true;
1822     }
1823   } else if (UseBMI2Instructions) {
1824     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1825     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1826   }
1827 
1828   // Use population count instruction if available.
1829   if (supports_popcnt()) {
1830     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1831       UsePopCountInstruction = true;
1832     }
1833   } else if (UsePopCountInstruction) {
1834     warning("POPCNT instruction is not available on this CPU");
1835     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1836   }
1837 
1838   // Use fast-string operations if available.
1839   if (supports_erms()) {
1840     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1841       UseFastStosb = true;
1842     }
1843   } else if (UseFastStosb) {
1844     warning("fast-string operations are not available on this CPU");
1845     FLAG_SET_DEFAULT(UseFastStosb, false);
1846   }
1847 
1848   // For AMD Processors use XMM/YMM MOVDQU instructions
1849   // for Object Initialization as default
1850   if (is_amd() && cpu_family() >= 0x19) {
1851     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1852       UseFastStosb = false;
1853     }
1854   }
1855 
1856 #ifdef COMPILER2
1857   if (is_intel() && MaxVectorSize > 16) {
1858     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1859       UseFastStosb = false;
1860     }
1861   }
1862 #endif
1863 
1864   // Use XMM/YMM MOVDQU instruction for Object Initialization
1865   if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1866     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1867       UseXMMForObjInit = true;
1868     }
1869   } else if (UseXMMForObjInit) {
1870     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1871     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1872   }
1873 
1874 #ifdef COMPILER2
1875   if (FLAG_IS_DEFAULT(AlignVector)) {
1876     // Modern processors allow misaligned memory operations for vectors.
1877     AlignVector = !UseUnalignedLoadStores;
1878   }
1879 #endif // COMPILER2
1880 
1881   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1882     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1883       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1884     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1885       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1886     }
1887   }
1888 
1889   // Allocation prefetch settings
1890   int cache_line_size = checked_cast<int>(prefetch_data_size());
1891   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1892       (cache_line_size > AllocatePrefetchStepSize)) {
1893     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1894   }
1895 
1896   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1897     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1898     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1899       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1900     }
1901     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1902   }
1903 
1904   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1905     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1906     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1907   }
1908 
1909   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1910     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1911         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1912       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1913     }
1914 #ifdef COMPILER2
1915     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1916       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1917     }
1918 #endif
1919   }
1920 
1921   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1922 #ifdef COMPILER2
1923     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1924       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1925     }
1926 #endif
1927   }
1928 
1929 #ifdef _LP64
1930   // Prefetch settings
1931 
1932   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1933   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1934   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1935   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1936 
1937   // gc copy/scan is disabled if prefetchw isn't supported, because
1938   // Prefetch::write emits an inlined prefetchw on Linux.
1939   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1940   // The used prefetcht0 instruction works for both amd64 and em64t.
1941 
1942   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1943     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1944   }
1945   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1946     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1947   }
1948 #endif
1949 
1950   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1951      (cache_line_size > ContendedPaddingWidth))
1952      ContendedPaddingWidth = cache_line_size;
1953 
1954   // This machine allows unaligned memory accesses
1955   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1956     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1957   }
1958 
1959 #ifndef PRODUCT
1960   if (log_is_enabled(Info, os, cpu)) {
1961     LogStream ls(Log(os, cpu)::info());
1962     outputStream* log = &ls;
1963     log->print_cr("Logical CPUs per core: %u",
1964                   logical_processors_per_package());
1965     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1966     log->print("UseSSE=%d", UseSSE);
1967     if (UseAVX > 0) {
1968       log->print("  UseAVX=%d", UseAVX);
1969     }
1970     if (UseAES) {
1971       log->print("  UseAES=1");
1972     }
1973 #ifdef COMPILER2
1974     if (MaxVectorSize > 0) {
1975       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1976     }
1977 #endif
1978     log->cr();
1979     log->print("Allocation");
1980     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1981       log->print_cr(": no prefetching");
1982     } else {
1983       log->print(" prefetching: ");
1984       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1985         log->print("PREFETCHW");
1986       } else if (UseSSE >= 1) {
1987         if (AllocatePrefetchInstr == 0) {
1988           log->print("PREFETCHNTA");
1989         } else if (AllocatePrefetchInstr == 1) {
1990           log->print("PREFETCHT0");
1991         } else if (AllocatePrefetchInstr == 2) {
1992           log->print("PREFETCHT2");
1993         } else if (AllocatePrefetchInstr == 3) {
1994           log->print("PREFETCHW");
1995         }
1996       }
1997       if (AllocatePrefetchLines > 1) {
1998         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1999       } else {
2000         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
2001       }
2002     }
2003 
2004     if (PrefetchCopyIntervalInBytes > 0) {
2005       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
2006     }
2007     if (PrefetchScanIntervalInBytes > 0) {
2008       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
2009     }
2010     if (ContendedPaddingWidth > 0) {
2011       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
2012     }
2013   }
2014 #endif // !PRODUCT
2015   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
2016       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
2017   }
2018   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
2019       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
2020   }
2021 }
2022 
2023 void VM_Version::print_platform_virtualization_info(outputStream* st) {
2024   VirtualizationType vrt = VM_Version::get_detected_virtualization();
2025   if (vrt == XenHVM) {
2026     st->print_cr("Xen hardware-assisted virtualization detected");
2027   } else if (vrt == KVM) {
2028     st->print_cr("KVM virtualization detected");
2029   } else if (vrt == VMWare) {
2030     st->print_cr("VMWare virtualization detected");
2031     VirtualizationSupport::print_virtualization_info(st);
2032   } else if (vrt == HyperV) {
2033     st->print_cr("Hyper-V virtualization detected");
2034   } else if (vrt == HyperVRole) {
2035     st->print_cr("Hyper-V role detected");
2036   }
2037 }
2038 
2039 bool VM_Version::compute_has_intel_jcc_erratum() {
2040   if (!is_intel_family_core()) {
2041     // Only Intel CPUs are affected.
2042     return false;
2043   }
2044   // The following table of affected CPUs is based on the following document released by Intel:
2045   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2046   switch (_model) {
2047   case 0x8E:
2048     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2049     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2050     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2051     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2052     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2053     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2054     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2055     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2056     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2057     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2058   case 0x4E:
2059     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2060     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2061     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2062     return _stepping == 0x3;
2063   case 0x55:
2064     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2065     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2066     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2067     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2068     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2069     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2070     return _stepping == 0x4 || _stepping == 0x7;
2071   case 0x5E:
2072     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2073     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2074     return _stepping == 0x3;
2075   case 0x9E:
2076     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2077     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2078     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2079     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2080     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2081     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2082     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2083     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2084     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2085     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2086     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2087     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2088     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2089     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2090     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2091   case 0xA5:
2092     // Not in Intel documentation.
2093     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2094     return true;
2095   case 0xA6:
2096     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2097     return _stepping == 0x0;
2098   case 0xAE:
2099     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2100     return _stepping == 0xA;
2101   default:
2102     // If we are running on another intel machine not recognized in the table, we are okay.
2103     return false;
2104   }
2105 }
2106 
2107 // On Xen, the cpuid instruction returns
2108 //  eax / registers[0]: Version of Xen
2109 //  ebx / registers[1]: chars 'XenV'
2110 //  ecx / registers[2]: chars 'MMXe'
2111 //  edx / registers[3]: chars 'nVMM'
2112 //
2113 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2114 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2115 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2116 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2117 //
2118 // more information :
2119 // https://kb.vmware.com/s/article/1009458
2120 //
2121 void VM_Version::check_virtualizations() {
2122   uint32_t registers[4] = {0};
2123   char signature[13] = {0};
2124 
2125   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2126   // from 0x40000000 until 0x40010000.
2127   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2128   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2129     detect_virt_stub(leaf, registers);
2130     memcpy(signature, &registers[1], 12);
2131 
2132     if (strncmp("VMwareVMware", signature, 12) == 0) {
2133       Abstract_VM_Version::_detected_virtualization = VMWare;
2134       // check for extended metrics from guestlib
2135       VirtualizationSupport::initialize();
2136     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2137       Abstract_VM_Version::_detected_virtualization = HyperV;
2138 #ifdef _WINDOWS
2139       // CPUID leaf 0x40000007 is available to the root partition only.
2140       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2141       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2142       detect_virt_stub(0x40000007, registers);
2143       if ((registers[0] != 0x0) ||
2144           (registers[1] != 0x0) ||
2145           (registers[2] != 0x0) ||
2146           (registers[3] != 0x0)) {
2147         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2148       }
2149 #endif
2150     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2151       Abstract_VM_Version::_detected_virtualization = KVM;
2152     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2153       Abstract_VM_Version::_detected_virtualization = XenHVM;
2154     }
2155   }
2156 }
2157 
2158 #ifdef COMPILER2
2159 // Determine if it's running on Cascade Lake using default options.
2160 bool VM_Version::is_default_intel_cascade_lake() {
2161   return FLAG_IS_DEFAULT(UseAVX) &&
2162          FLAG_IS_DEFAULT(MaxVectorSize) &&
2163          UseAVX > 2 &&
2164          is_intel_cascade_lake();
2165 }
2166 #endif
2167 
2168 bool VM_Version::is_intel_cascade_lake() {
2169   return is_intel_skylake() && _stepping >= 5;
2170 }
2171 
2172 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2173 // for implementing the array copy and clear operations.
2174 // The Intel platforms that supports the serialize instruction
2175 // has improved implementation of 64-byte load/stores and so the default
2176 // threshold is set to 0 for these platforms.
2177 int VM_Version::avx3_threshold() {
2178   return (is_intel_family_core() &&
2179           supports_serialize() &&
2180           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2181 }
2182 
2183 #if defined(_LP64)
2184 void VM_Version::clear_apx_test_state() {
2185   clear_apx_test_state_stub();
2186 }
2187 #endif
2188 
2189 static bool _vm_version_initialized = false;
2190 
2191 void VM_Version::initialize() {
2192   ResourceMark rm;
2193   // Making this stub must be FIRST use of assembler
2194   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2195   if (stub_blob == nullptr) {
2196     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2197   }
2198   CodeBuffer c(stub_blob);
2199   VM_Version_StubGenerator g(&c);
2200 
2201   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2202                                      g.generate_get_cpu_info());
2203   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2204                                      g.generate_detect_virt());
2205 
2206 #if defined(_LP64)
2207   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2208                                      g.clear_apx_test_state());
2209 #endif
2210   get_processor_features();
2211 
2212   LP64_ONLY(Assembler::precompute_instructions();)
2213 
2214   if (VM_Version::supports_hv()) { // Supports hypervisor
2215     check_virtualizations();
2216   }
2217   _vm_version_initialized = true;
2218 }
2219 
2220 typedef enum {
2221    CPU_FAMILY_8086_8088  = 0,
2222    CPU_FAMILY_INTEL_286  = 2,
2223    CPU_FAMILY_INTEL_386  = 3,
2224    CPU_FAMILY_INTEL_486  = 4,
2225    CPU_FAMILY_PENTIUM    = 5,
2226    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2227    CPU_FAMILY_PENTIUM_4  = 0xF
2228 } FamilyFlag;
2229 
2230 typedef enum {
2231   RDTSCP_FLAG  = 0x08000000, // bit 27
2232   INTEL64_FLAG = 0x20000000  // bit 29
2233 } _featureExtendedEdxFlag;
2234 
2235 typedef enum {
2236    FPU_FLAG     = 0x00000001,
2237    VME_FLAG     = 0x00000002,
2238    DE_FLAG      = 0x00000004,
2239    PSE_FLAG     = 0x00000008,
2240    TSC_FLAG     = 0x00000010,
2241    MSR_FLAG     = 0x00000020,
2242    PAE_FLAG     = 0x00000040,
2243    MCE_FLAG     = 0x00000080,
2244    CX8_FLAG     = 0x00000100,
2245    APIC_FLAG    = 0x00000200,
2246    SEP_FLAG     = 0x00000800,
2247    MTRR_FLAG    = 0x00001000,
2248    PGE_FLAG     = 0x00002000,
2249    MCA_FLAG     = 0x00004000,
2250    CMOV_FLAG    = 0x00008000,
2251    PAT_FLAG     = 0x00010000,
2252    PSE36_FLAG   = 0x00020000,
2253    PSNUM_FLAG   = 0x00040000,
2254    CLFLUSH_FLAG = 0x00080000,
2255    DTS_FLAG     = 0x00200000,
2256    ACPI_FLAG    = 0x00400000,
2257    MMX_FLAG     = 0x00800000,
2258    FXSR_FLAG    = 0x01000000,
2259    SSE_FLAG     = 0x02000000,
2260    SSE2_FLAG    = 0x04000000,
2261    SS_FLAG      = 0x08000000,
2262    HTT_FLAG     = 0x10000000,
2263    TM_FLAG      = 0x20000000
2264 } FeatureEdxFlag;
2265 
2266 static BufferBlob* cpuid_brand_string_stub_blob;
2267 static const int   cpuid_brand_string_stub_size = 550;
2268 
2269 extern "C" {
2270   typedef void (*getCPUIDBrandString_stub_t)(void*);
2271 }
2272 
2273 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2274 
2275 // VM_Version statics
2276 enum {
2277   ExtendedFamilyIdLength_INTEL = 16,
2278   ExtendedFamilyIdLength_AMD   = 24
2279 };
2280 
2281 const size_t VENDOR_LENGTH = 13;
2282 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2283 static char* _cpu_brand_string = nullptr;
2284 static int64_t _max_qualified_cpu_frequency = 0;
2285 
2286 static int _no_of_threads = 0;
2287 static int _no_of_cores = 0;
2288 
2289 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2290   "8086/8088",
2291   "",
2292   "286",
2293   "386",
2294   "486",
2295   "Pentium",
2296   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2297   "",
2298   "",
2299   "",
2300   "",
2301   "",
2302   "",
2303   "",
2304   "",
2305   "Pentium 4"
2306 };
2307 
2308 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2309   "",
2310   "",
2311   "",
2312   "",
2313   "5x86",
2314   "K5/K6",
2315   "Athlon/AthlonXP",
2316   "",
2317   "",
2318   "",
2319   "",
2320   "",
2321   "",
2322   "",
2323   "",
2324   "Opteron/Athlon64",
2325   "Opteron QC/Phenom",  // Barcelona et.al.
2326   "",
2327   "",
2328   "",
2329   "",
2330   "",
2331   "",
2332   "Zen"
2333 };
2334 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2335 // September 2013, Vol 3C Table 35-1
2336 const char* const _model_id_pentium_pro[] = {
2337   "",
2338   "Pentium Pro",
2339   "",
2340   "Pentium II model 3",
2341   "",
2342   "Pentium II model 5/Xeon/Celeron",
2343   "Celeron",
2344   "Pentium III/Pentium III Xeon",
2345   "Pentium III/Pentium III Xeon",
2346   "Pentium M model 9",    // Yonah
2347   "Pentium III, model A",
2348   "Pentium III, model B",
2349   "",
2350   "Pentium M model D",    // Dothan
2351   "",
2352   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2353   "",
2354   "",
2355   "",
2356   "",
2357   "",
2358   "",
2359   "Celeron",              // 0x16 Celeron 65nm
2360   "Core 2",               // 0x17 Penryn / Harpertown
2361   "",
2362   "",
2363   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2364   "Atom",                 // 0x1B Z5xx series Silverthorn
2365   "",
2366   "Core 2",               // 0x1D Dunnington (6-core)
2367   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2368   "",
2369   "",
2370   "",
2371   "",
2372   "",
2373   "",
2374   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2375   "",
2376   "",
2377   "",                     // 0x28
2378   "",
2379   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2380   "",
2381   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2382   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2383   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2384   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "",
2392   "",
2393   "",
2394   "",
2395   "Ivy Bridge",           // 0x3a
2396   "",
2397   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2398   "",                     // 0x3d "Next Generation Intel Core Processor"
2399   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2400   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2407   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2408   nullptr
2409 };
2410 
2411 /* Brand ID is for back compatibility
2412  * Newer CPUs uses the extended brand string */
2413 const char* const _brand_id[] = {
2414   "",
2415   "Celeron processor",
2416   "Pentium III processor",
2417   "Intel Pentium III Xeon processor",
2418   "",
2419   "",
2420   "",
2421   "",
2422   "Intel Pentium 4 processor",
2423   nullptr
2424 };
2425 
2426 
2427 const char* const _feature_edx_id[] = {
2428   "On-Chip FPU",
2429   "Virtual Mode Extensions",
2430   "Debugging Extensions",
2431   "Page Size Extensions",
2432   "Time Stamp Counter",
2433   "Model Specific Registers",
2434   "Physical Address Extension",
2435   "Machine Check Exceptions",
2436   "CMPXCHG8B Instruction",
2437   "On-Chip APIC",
2438   "",
2439   "Fast System Call",
2440   "Memory Type Range Registers",
2441   "Page Global Enable",
2442   "Machine Check Architecture",
2443   "Conditional Mov Instruction",
2444   "Page Attribute Table",
2445   "36-bit Page Size Extension",
2446   "Processor Serial Number",
2447   "CLFLUSH Instruction",
2448   "",
2449   "Debug Trace Store feature",
2450   "ACPI registers in MSR space",
2451   "Intel Architecture MMX Technology",
2452   "Fast Float Point Save and Restore",
2453   "Streaming SIMD extensions",
2454   "Streaming SIMD extensions 2",
2455   "Self-Snoop",
2456   "Hyper Threading",
2457   "Thermal Monitor",
2458   "",
2459   "Pending Break Enable"
2460 };
2461 
2462 const char* const _feature_extended_edx_id[] = {
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "SYSCALL/SYSRET",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "Execute Disable Bit",
2484   "",
2485   "",
2486   "",
2487   "",
2488   "",
2489   "",
2490   "RDTSCP",
2491   "",
2492   "Intel 64 Architecture",
2493   "",
2494   ""
2495 };
2496 
2497 const char* const _feature_ecx_id[] = {
2498   "Streaming SIMD Extensions 3",
2499   "PCLMULQDQ",
2500   "64-bit DS Area",
2501   "MONITOR/MWAIT instructions",
2502   "CPL Qualified Debug Store",
2503   "Virtual Machine Extensions",
2504   "Safer Mode Extensions",
2505   "Enhanced Intel SpeedStep technology",
2506   "Thermal Monitor 2",
2507   "Supplemental Streaming SIMD Extensions 3",
2508   "L1 Context ID",
2509   "",
2510   "Fused Multiply-Add",
2511   "CMPXCHG16B",
2512   "xTPR Update Control",
2513   "Perfmon and Debug Capability",
2514   "",
2515   "Process-context identifiers",
2516   "Direct Cache Access",
2517   "Streaming SIMD extensions 4.1",
2518   "Streaming SIMD extensions 4.2",
2519   "x2APIC",
2520   "MOVBE",
2521   "Popcount instruction",
2522   "TSC-Deadline",
2523   "AESNI",
2524   "XSAVE",
2525   "OSXSAVE",
2526   "AVX",
2527   "F16C",
2528   "RDRAND",
2529   ""
2530 };
2531 
2532 const char* const _feature_extended_ecx_id[] = {
2533   "LAHF/SAHF instruction support",
2534   "Core multi-processor legacy mode",
2535   "",
2536   "",
2537   "",
2538   "Advanced Bit Manipulations: LZCNT",
2539   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2540   "Misaligned SSE mode",
2541   "",
2542   "",
2543   "",
2544   "",
2545   "",
2546   "",
2547   "",
2548   "",
2549   "",
2550   "",
2551   "",
2552   "",
2553   "",
2554   "",
2555   "",
2556   "",
2557   "",
2558   "",
2559   "",
2560   "",
2561   "",
2562   "",
2563   "",
2564   ""
2565 };
2566 
2567 void VM_Version::initialize_tsc(void) {
2568   ResourceMark rm;
2569 
2570   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2571   if (cpuid_brand_string_stub_blob == nullptr) {
2572     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2573   }
2574   CodeBuffer c(cpuid_brand_string_stub_blob);
2575   VM_Version_StubGenerator g(&c);
2576   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2577                                    g.generate_getCPUIDBrandString());
2578 }
2579 
2580 const char* VM_Version::cpu_model_description(void) {
2581   uint32_t cpu_family = extended_cpu_family();
2582   uint32_t cpu_model = extended_cpu_model();
2583   const char* model = nullptr;
2584 
2585   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2586     for (uint32_t i = 0; i <= cpu_model; i++) {
2587       model = _model_id_pentium_pro[i];
2588       if (model == nullptr) {
2589         break;
2590       }
2591     }
2592   }
2593   return model;
2594 }
2595 
2596 const char* VM_Version::cpu_brand_string(void) {
2597   if (_cpu_brand_string == nullptr) {
2598     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2599     if (nullptr == _cpu_brand_string) {
2600       return nullptr;
2601     }
2602     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2603     if (ret_val != OS_OK) {
2604       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2605       _cpu_brand_string = nullptr;
2606     }
2607   }
2608   return _cpu_brand_string;
2609 }
2610 
2611 const char* VM_Version::cpu_brand(void) {
2612   const char*  brand  = nullptr;
2613 
2614   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2615     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2616     brand = _brand_id[0];
2617     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2618       brand = _brand_id[i];
2619     }
2620   }
2621   return brand;
2622 }
2623 
2624 bool VM_Version::cpu_is_em64t(void) {
2625   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2626 }
2627 
2628 bool VM_Version::is_netburst(void) {
2629   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2630 }
2631 
2632 bool VM_Version::supports_tscinv_ext(void) {
2633   if (!supports_tscinv_bit()) {
2634     return false;
2635   }
2636 
2637   if (is_intel()) {
2638     return true;
2639   }
2640 
2641   if (is_amd()) {
2642     return !is_amd_Barcelona();
2643   }
2644 
2645   if (is_hygon()) {
2646     return true;
2647   }
2648 
2649   return false;
2650 }
2651 
2652 void VM_Version::resolve_cpu_information_details(void) {
2653 
2654   // in future we want to base this information on proper cpu
2655   // and cache topology enumeration such as:
2656   // Intel 64 Architecture Processor Topology Enumeration
2657   // which supports system cpu and cache topology enumeration
2658   // either using 2xAPICIDs or initial APICIDs
2659 
2660   // currently only rough cpu information estimates
2661   // which will not necessarily reflect the exact configuration of the system
2662 
2663   // this is the number of logical hardware threads
2664   // visible to the operating system
2665   _no_of_threads = os::processor_count();
2666 
2667   // find out number of threads per cpu package
2668   int threads_per_package = threads_per_core() * cores_per_cpu();
2669 
2670   // use amount of threads visible to the process in order to guess number of sockets
2671   _no_of_sockets = _no_of_threads / threads_per_package;
2672 
2673   // process might only see a subset of the total number of threads
2674   // from a single processor package. Virtualization/resource management for example.
2675   // If so then just write a hard 1 as num of pkgs.
2676   if (0 == _no_of_sockets) {
2677     _no_of_sockets = 1;
2678   }
2679 
2680   // estimate the number of cores
2681   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2682 }
2683 
2684 
2685 const char* VM_Version::cpu_family_description(void) {
2686   int cpu_family_id = extended_cpu_family();
2687   if (is_amd()) {
2688     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2689       return _family_id_amd[cpu_family_id];
2690     }
2691   }
2692   if (is_intel()) {
2693     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2694       return cpu_model_description();
2695     }
2696     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2697       return _family_id_intel[cpu_family_id];
2698     }
2699   }
2700   if (is_hygon()) {
2701     return "Dhyana";
2702   }
2703   return "Unknown x86";
2704 }
2705 
2706 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2707   assert(buf != nullptr, "buffer is null!");
2708   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2709 
2710   const char* cpu_type = nullptr;
2711   const char* x64 = nullptr;
2712 
2713   if (is_intel()) {
2714     cpu_type = "Intel";
2715     x64 = cpu_is_em64t() ? " Intel64" : "";
2716   } else if (is_amd()) {
2717     cpu_type = "AMD";
2718     x64 = cpu_is_em64t() ? " AMD64" : "";
2719   } else if (is_hygon()) {
2720     cpu_type = "Hygon";
2721     x64 = cpu_is_em64t() ? " AMD64" : "";
2722   } else {
2723     cpu_type = "Unknown x86";
2724     x64 = cpu_is_em64t() ? " x86_64" : "";
2725   }
2726 
2727   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2728     cpu_type,
2729     cpu_family_description(),
2730     supports_ht() ? " (HT)" : "",
2731     supports_sse3() ? " SSE3" : "",
2732     supports_ssse3() ? " SSSE3" : "",
2733     supports_sse4_1() ? " SSE4.1" : "",
2734     supports_sse4_2() ? " SSE4.2" : "",
2735     supports_sse4a() ? " SSE4A" : "",
2736     is_netburst() ? " Netburst" : "",
2737     is_intel_family_core() ? " Core" : "",
2738     x64);
2739 
2740   return OS_OK;
2741 }
2742 
2743 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2744   assert(buf != nullptr, "buffer is null!");
2745   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2746   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2747 
2748   // invoke newly generated asm code to fetch CPU Brand String
2749   getCPUIDBrandString_stub(&_cpuid_info);
2750 
2751   // fetch results into buffer
2752   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2753   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2754   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2755   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2756   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2757   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2758   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2759   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2760   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2761   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2762   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2763   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2764 
2765   return OS_OK;
2766 }
2767 
2768 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2769   guarantee(buf != nullptr, "buffer is null!");
2770   guarantee(buf_len > 0, "buffer len not enough!");
2771 
2772   unsigned int flag = 0;
2773   unsigned int fi = 0;
2774   size_t       written = 0;
2775   const char*  prefix = "";
2776 
2777 #define WRITE_TO_BUF(string)                                                          \
2778   {                                                                                   \
2779     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2780     if (res < 0) {                                                                    \
2781       return buf_len - 1;                                                             \
2782     }                                                                                 \
2783     written += res;                                                                   \
2784     if (prefix[0] == '\0') {                                                          \
2785       prefix = ", ";                                                                  \
2786     }                                                                                 \
2787   }
2788 
2789   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2790     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2791       continue; /* no hyperthreading */
2792     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2793       continue; /* no fast system call */
2794     }
2795     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2796       WRITE_TO_BUF(_feature_edx_id[fi]);
2797     }
2798   }
2799 
2800   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2801     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2802       WRITE_TO_BUF(_feature_ecx_id[fi]);
2803     }
2804   }
2805 
2806   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2807     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2808       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2809     }
2810   }
2811 
2812   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2813     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2814       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2815     }
2816   }
2817 
2818   if (supports_tscinv_bit()) {
2819       WRITE_TO_BUF("Invariant TSC");
2820   }
2821 
2822   return written;
2823 }
2824 
2825 /**
2826  * Write a detailed description of the cpu to a given buffer, including
2827  * feature set.
2828  */
2829 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2830   assert(buf != nullptr, "buffer is null!");
2831   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2832 
2833   static const char* unknown = "<unknown>";
2834   char               vendor_id[VENDOR_LENGTH];
2835   const char*        family = nullptr;
2836   const char*        model = nullptr;
2837   const char*        brand = nullptr;
2838   int                outputLen = 0;
2839 
2840   family = cpu_family_description();
2841   if (family == nullptr) {
2842     family = unknown;
2843   }
2844 
2845   model = cpu_model_description();
2846   if (model == nullptr) {
2847     model = unknown;
2848   }
2849 
2850   brand = cpu_brand_string();
2851 
2852   if (brand == nullptr) {
2853     brand = cpu_brand();
2854     if (brand == nullptr) {
2855       brand = unknown;
2856     }
2857   }
2858 
2859   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2860   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2861   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2862   vendor_id[VENDOR_LENGTH-1] = '\0';
2863 
2864   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2865     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2866     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2867     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2868     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2869     "Supports: ",
2870     brand,
2871     vendor_id,
2872     family,
2873     extended_cpu_family(),
2874     model,
2875     extended_cpu_model(),
2876     cpu_stepping(),
2877     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2878     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2879     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2880     _cpuid_info.std_cpuid1_eax.value,
2881     _cpuid_info.std_cpuid1_ebx.value,
2882     _cpuid_info.std_cpuid1_ecx.value,
2883     _cpuid_info.std_cpuid1_edx.value,
2884     _cpuid_info.ext_cpuid1_eax,
2885     _cpuid_info.ext_cpuid1_ebx,
2886     _cpuid_info.ext_cpuid1_ecx,
2887     _cpuid_info.ext_cpuid1_edx);
2888 
2889   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2890     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2891     return OS_ERR;
2892   }
2893 
2894   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2895 
2896   return OS_OK;
2897 }
2898 
2899 
2900 // Fill in Abstract_VM_Version statics
2901 void VM_Version::initialize_cpu_information() {
2902   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2903   assert(!_initialized, "shouldn't be initialized yet");
2904   resolve_cpu_information_details();
2905 
2906   // initialize cpu_name and cpu_desc
2907   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2908   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2909   _initialized = true;
2910 }
2911 
2912 /**
2913  *  For information about extracting the frequency from the cpu brand string, please see:
2914  *
2915  *    Intel Processor Identification and the CPUID Instruction
2916  *    Application Note 485
2917  *    May 2012
2918  *
2919  * The return value is the frequency in Hz.
2920  */
2921 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2922   const char* const brand_string = cpu_brand_string();
2923   if (brand_string == nullptr) {
2924     return 0;
2925   }
2926   const int64_t MEGA = 1000000;
2927   int64_t multiplier = 0;
2928   int64_t frequency = 0;
2929   uint8_t idx = 0;
2930   // The brand string buffer is at most 48 bytes.
2931   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2932   for (; idx < 48-2; ++idx) {
2933     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2934     // Search brand string for "yHz" where y is M, G, or T.
2935     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2936       if (brand_string[idx] == 'M') {
2937         multiplier = MEGA;
2938       } else if (brand_string[idx] == 'G') {
2939         multiplier = MEGA * 1000;
2940       } else if (brand_string[idx] == 'T') {
2941         multiplier = MEGA * MEGA;
2942       }
2943       break;
2944     }
2945   }
2946   if (multiplier > 0) {
2947     // Compute frequency (in Hz) from brand string.
2948     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2949       frequency =  (brand_string[idx-4] - '0') * multiplier;
2950       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2951       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2952     } else { // format is "xxxx"
2953       frequency =  (brand_string[idx-4] - '0') * 1000;
2954       frequency += (brand_string[idx-3] - '0') * 100;
2955       frequency += (brand_string[idx-2] - '0') * 10;
2956       frequency += (brand_string[idx-1] - '0');
2957       frequency *= multiplier;
2958     }
2959   }
2960   return frequency;
2961 }
2962 
2963 
2964 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2965   if (_max_qualified_cpu_frequency == 0) {
2966     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2967   }
2968   return _max_qualified_cpu_frequency;
2969 }
2970 
2971 uint64_t VM_Version::CpuidInfo::feature_flags() const {
2972   uint64_t result = 0;
2973   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2974     result |= CPU_CX8;
2975   if (std_cpuid1_edx.bits.cmov != 0)
2976     result |= CPU_CMOV;
2977   if (std_cpuid1_edx.bits.clflush != 0)
2978     result |= CPU_FLUSH;
2979 #ifdef _LP64
2980   // clflush should always be available on x86_64
2981   // if not we are in real trouble because we rely on it
2982   // to flush the code cache.
2983   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2984 #endif
2985   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2986       ext_cpuid1_edx.bits.fxsr != 0))
2987     result |= CPU_FXSR;
2988   // HT flag is set for multi-core processors also.
2989   if (threads_per_core() > 1)
2990     result |= CPU_HT;
2991   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2992       ext_cpuid1_edx.bits.mmx != 0))
2993     result |= CPU_MMX;
2994   if (std_cpuid1_edx.bits.sse != 0)
2995     result |= CPU_SSE;
2996   if (std_cpuid1_edx.bits.sse2 != 0)
2997     result |= CPU_SSE2;
2998   if (std_cpuid1_ecx.bits.sse3 != 0)
2999     result |= CPU_SSE3;
3000   if (std_cpuid1_ecx.bits.ssse3 != 0)
3001     result |= CPU_SSSE3;
3002   if (std_cpuid1_ecx.bits.sse4_1 != 0)
3003     result |= CPU_SSE4_1;
3004   if (std_cpuid1_ecx.bits.sse4_2 != 0)
3005     result |= CPU_SSE4_2;
3006   if (std_cpuid1_ecx.bits.popcnt != 0)
3007     result |= CPU_POPCNT;
3008   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
3009       xem_xcr0_eax.bits.apx_f != 0) {
3010     result |= CPU_APX_F;
3011   }
3012   if (std_cpuid1_ecx.bits.avx != 0 &&
3013       std_cpuid1_ecx.bits.osxsave != 0 &&
3014       xem_xcr0_eax.bits.sse != 0 &&
3015       xem_xcr0_eax.bits.ymm != 0) {
3016     result |= CPU_AVX;
3017     result |= CPU_VZEROUPPER;
3018     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
3019       result |= CPU_SHA512;
3020     if (std_cpuid1_ecx.bits.f16c != 0)
3021       result |= CPU_F16C;
3022     if (sef_cpuid7_ebx.bits.avx2 != 0) {
3023       result |= CPU_AVX2;
3024       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
3025         result |= CPU_AVX_IFMA;
3026     }
3027     if (sef_cpuid7_ecx.bits.gfni != 0)
3028         result |= CPU_GFNI;
3029     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
3030         xem_xcr0_eax.bits.opmask != 0 &&
3031         xem_xcr0_eax.bits.zmm512 != 0 &&
3032         xem_xcr0_eax.bits.zmm32 != 0) {
3033       result |= CPU_AVX512F;
3034       if (sef_cpuid7_ebx.bits.avx512cd != 0)
3035         result |= CPU_AVX512CD;
3036       if (sef_cpuid7_ebx.bits.avx512dq != 0)
3037         result |= CPU_AVX512DQ;
3038       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
3039         result |= CPU_AVX512_IFMA;
3040       if (sef_cpuid7_ebx.bits.avx512pf != 0)
3041         result |= CPU_AVX512PF;
3042       if (sef_cpuid7_ebx.bits.avx512er != 0)
3043         result |= CPU_AVX512ER;
3044       if (sef_cpuid7_ebx.bits.avx512bw != 0)
3045         result |= CPU_AVX512BW;
3046       if (sef_cpuid7_ebx.bits.avx512vl != 0)
3047         result |= CPU_AVX512VL;
3048       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3049         result |= CPU_AVX512_VPOPCNTDQ;
3050       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3051         result |= CPU_AVX512_VPCLMULQDQ;
3052       if (sef_cpuid7_ecx.bits.vaes != 0)
3053         result |= CPU_AVX512_VAES;
3054       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3055         result |= CPU_AVX512_VNNI;
3056       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3057         result |= CPU_AVX512_BITALG;
3058       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3059         result |= CPU_AVX512_VBMI;
3060       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3061         result |= CPU_AVX512_VBMI2;
3062     }
3063   }
3064   if (std_cpuid1_ecx.bits.hv != 0)
3065     result |= CPU_HV;
3066   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3067     result |= CPU_BMI1;
3068   if (std_cpuid1_edx.bits.tsc != 0)
3069     result |= CPU_TSC;
3070   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3071     result |= CPU_TSCINV_BIT;
3072   if (std_cpuid1_ecx.bits.aes != 0)
3073     result |= CPU_AES;
3074   if (sef_cpuid7_ebx.bits.erms != 0)
3075     result |= CPU_ERMS;
3076   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3077     result |= CPU_FSRM;
3078   if (std_cpuid1_ecx.bits.clmul != 0)
3079     result |= CPU_CLMUL;
3080   if (sef_cpuid7_ebx.bits.rtm != 0)
3081     result |= CPU_RTM;
3082   if (sef_cpuid7_ebx.bits.adx != 0)
3083      result |= CPU_ADX;
3084   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3085     result |= CPU_BMI2;
3086   if (sef_cpuid7_ebx.bits.sha != 0)
3087     result |= CPU_SHA;
3088   if (std_cpuid1_ecx.bits.fma != 0)
3089     result |= CPU_FMA;
3090   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3091     result |= CPU_FLUSHOPT;
3092   if (ext_cpuid1_edx.bits.rdtscp != 0)
3093     result |= CPU_RDTSCP;
3094   if (sef_cpuid7_ecx.bits.rdpid != 0)
3095     result |= CPU_RDPID;
3096 
3097   // AMD|Hygon features.
3098   if (is_amd_family()) {
3099     if ((ext_cpuid1_edx.bits.tdnow != 0) ||
3100         (ext_cpuid1_ecx.bits.prefetchw != 0))
3101       result |= CPU_3DNOW_PREFETCH;
3102     if (ext_cpuid1_ecx.bits.lzcnt != 0)
3103       result |= CPU_LZCNT;
3104     if (ext_cpuid1_ecx.bits.sse4a != 0)
3105       result |= CPU_SSE4A;
3106   }
3107 
3108   // Intel features.
3109   if (is_intel()) {
3110     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3111       result |= CPU_LZCNT;
3112     }
3113     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3114       result |= CPU_3DNOW_PREFETCH;
3115     }
3116     if (sef_cpuid7_ebx.bits.clwb != 0) {
3117       result |= CPU_CLWB;
3118     }
3119     if (sef_cpuid7_edx.bits.serialize != 0)
3120       result |= CPU_SERIALIZE;
3121   }
3122 
3123   // ZX features.
3124   if (is_zx()) {
3125     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3126       result |= CPU_LZCNT;
3127     }
3128     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3129       result |= CPU_3DNOW_PREFETCH;
3130     }
3131   }
3132 
3133   // Protection key features.
3134   if (sef_cpuid7_ecx.bits.pku != 0) {
3135     result |= CPU_PKU;
3136   }
3137   if (sef_cpuid7_ecx.bits.ospke != 0) {
3138     result |= CPU_OSPKE;
3139   }
3140 
3141   // Control flow enforcement (CET) features.
3142   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3143     result |= CPU_CET_SS;
3144   }
3145   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3146     result |= CPU_CET_IBT;
3147   }
3148 
3149   // Composite features.
3150   if (supports_tscinv_bit() &&
3151       ((is_amd_family() && !is_amd_Barcelona()) ||
3152        is_intel_tsc_synched_at_init())) {
3153     result |= CPU_TSCINV;
3154   }
3155 
3156   return result;
3157 }
3158 
3159 bool VM_Version::os_supports_avx_vectors() {
3160   bool retVal = false;
3161   int nreg = 2 LP64_ONLY(+2);
3162   if (supports_evex()) {
3163     // Verify that OS save/restore all bits of EVEX registers
3164     // during signal processing.
3165     retVal = true;
3166     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3167       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3168         retVal = false;
3169         break;
3170       }
3171     }
3172   } else if (supports_avx()) {
3173     // Verify that OS save/restore all bits of AVX registers
3174     // during signal processing.
3175     retVal = true;
3176     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3177       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3178         retVal = false;
3179         break;
3180       }
3181     }
3182     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3183     if (retVal == false) {
3184       // Verify that OS save/restore all bits of EVEX registers
3185       // during signal processing.
3186       retVal = true;
3187       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3188         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3189           retVal = false;
3190           break;
3191         }
3192       }
3193     }
3194   }
3195   return retVal;
3196 }
3197 
3198 bool VM_Version::os_supports_apx_egprs() {
3199   if (!supports_apx_f()) {
3200     return false;
3201   }
3202   // Enable APX support for product builds after
3203   // completion of planned features listed in JDK-8329030.
3204 #if !defined(PRODUCT)
3205   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3206       _cpuid_info.apx_save[1] != egpr_test_value()) {
3207     return false;
3208   }
3209   return true;
3210 #else
3211   return false;
3212 #endif
3213 }
3214 
3215 uint VM_Version::cores_per_cpu() {
3216   uint result = 1;
3217   if (is_intel()) {
3218     bool supports_topology = supports_processor_topology();
3219     if (supports_topology) {
3220       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3221                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3222     }
3223     if (!supports_topology || result == 0) {
3224       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3225     }
3226   } else if (is_amd_family()) {
3227     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3228   } else if (is_zx()) {
3229     bool supports_topology = supports_processor_topology();
3230     if (supports_topology) {
3231       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3232                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3233     }
3234     if (!supports_topology || result == 0) {
3235       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3236     }
3237   }
3238   return result;
3239 }
3240 
3241 uint VM_Version::threads_per_core() {
3242   uint result = 1;
3243   if (is_intel() && supports_processor_topology()) {
3244     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3245   } else if (is_zx() && supports_processor_topology()) {
3246     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3247   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3248     if (cpu_family() >= 0x17) {
3249       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3250     } else {
3251       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3252                  cores_per_cpu();
3253     }
3254   }
3255   return (result == 0 ? 1 : result);
3256 }
3257 
3258 uint VM_Version::L1_line_size() {
3259   uint result = 0;
3260   if (is_intel()) {
3261     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3262   } else if (is_amd_family()) {
3263     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3264   } else if (is_zx()) {
3265     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3266   }
3267   if (result < 32) // not defined ?
3268     result = 32;   // 32 bytes by default on x86 and other x64
3269   return result;
3270 }
3271 
3272 bool VM_Version::is_intel_tsc_synched_at_init() {
3273   if (is_intel_family_core()) {
3274     uint32_t ext_model = extended_cpu_model();
3275     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3276         ext_model == CPU_MODEL_WESTMERE_EP    ||
3277         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3278         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3279       // <= 2-socket invariant tsc support. EX versions are usually used
3280       // in > 2-socket systems and likely don't synchronize tscs at
3281       // initialization.
3282       // Code that uses tsc values must be prepared for them to arbitrarily
3283       // jump forward or backward.
3284       return true;
3285     }
3286   }
3287   return false;
3288 }
3289 
3290 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3291   // Hardware prefetching (distance/size in bytes):
3292   // Pentium 3 -  64 /  32
3293   // Pentium 4 - 256 / 128
3294   // Athlon    -  64 /  32 ????
3295   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3296   // Core      - 128 /  64
3297   //
3298   // Software prefetching (distance in bytes / instruction with best score):
3299   // Pentium 3 - 128 / prefetchnta
3300   // Pentium 4 - 512 / prefetchnta
3301   // Athlon    - 128 / prefetchnta
3302   // Opteron   - 256 / prefetchnta
3303   // Core      - 256 / prefetchnta
3304   // It will be used only when AllocatePrefetchStyle > 0
3305 
3306   if (is_amd_family()) { // AMD | Hygon
3307     if (supports_sse2()) {
3308       return 256; // Opteron
3309     } else {
3310       return 128; // Athlon
3311     }
3312   } else { // Intel
3313     if (supports_sse3() && cpu_family() == 6) {
3314       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3315         return 192;
3316       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3317 #ifdef _LP64
3318         return 384;
3319 #else
3320         return 320;
3321 #endif
3322       }
3323     }
3324     if (supports_sse2()) {
3325       if (cpu_family() == 6) {
3326         return 256; // Pentium M, Core, Core2
3327       } else {
3328         return 512; // Pentium 4
3329       }
3330     } else {
3331       return 128; // Pentium 3 (and all other old CPUs)
3332     }
3333   }
3334 }
3335 
3336 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3337   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3338   switch (id) {
3339   case vmIntrinsics::_floatToFloat16:
3340   case vmIntrinsics::_float16ToFloat:
3341     if (!supports_float16()) {
3342       return false;
3343     }
3344     break;
3345   default:
3346     break;
3347   }
3348   return true;
3349 }