1 /*
   2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2000;
  66 
  67 extern "C" {
  68   typedef void (*get_cpu_info_stub_t)(void*);
  69   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  70   typedef void (*clear_apx_test_state_t)(void);
  71 }
  72 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  73 static detect_virt_stub_t detect_virt_stub = nullptr;
  74 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  75 
  76 #ifdef _LP64
  77 
  78 bool VM_Version::supports_clflush() {
  79   // clflush should always be available on x86_64
  80   // if not we are in real trouble because we rely on it
  81   // to flush the code cache.
  82   // Unfortunately, Assembler::clflush is currently called as part
  83   // of generation of the code cache flush routine. This happens
  84   // under Universe::init before the processor features are set
  85   // up. Assembler::flush calls this routine to check that clflush
  86   // is allowed. So, we give the caller a free pass if Universe init
  87   // is still in progress.
  88   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  89   return true;
  90 }
  91 #endif
  92 
  93 #define CPUID_STANDARD_FN   0x0
  94 #define CPUID_STANDARD_FN_1 0x1
  95 #define CPUID_STANDARD_FN_4 0x4
  96 #define CPUID_STANDARD_FN_B 0xb
  97 
  98 #define CPUID_EXTENDED_FN   0x80000000
  99 #define CPUID_EXTENDED_FN_1 0x80000001
 100 #define CPUID_EXTENDED_FN_2 0x80000002
 101 #define CPUID_EXTENDED_FN_3 0x80000003
 102 #define CPUID_EXTENDED_FN_4 0x80000004
 103 #define CPUID_EXTENDED_FN_7 0x80000007
 104 #define CPUID_EXTENDED_FN_8 0x80000008
 105 
 106 class VM_Version_StubGenerator: public StubCodeGenerator {
 107  public:
 108 
 109   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 110 
 111 #if defined(_LP64)
 112   address clear_apx_test_state() {
 113 #   define __ _masm->
 114     address start = __ pc();
 115     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 116     // handling guarantees that preserved register values post signal handling were
 117     // re-instantiated by operating system and not because they were not modified externally.
 118 
 119     bool save_apx = UseAPX;
 120     VM_Version::set_apx_cpuFeatures();
 121     UseAPX = true;
 122     // EGPR state save/restoration.
 123     __ mov64(r16, 0L);
 124     __ mov64(r31, 0L);
 125     UseAPX = save_apx;
 126     VM_Version::clean_cpuFeatures();
 127     __ ret(0);
 128     return start;
 129   }
 130 #endif
 131 
 132   address generate_get_cpu_info() {
 133     // Flags to test CPU type.
 134     const uint32_t HS_EFL_AC = 0x40000;
 135     const uint32_t HS_EFL_ID = 0x200000;
 136     // Values for when we don't have a CPUID instruction.
 137     const int      CPU_FAMILY_SHIFT = 8;
 138     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 139     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 140     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 141 
 142     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 143     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 144     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 145     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 146 
 147     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 148 #   define __ _masm->
 149 
 150     address start = __ pc();
 151 
 152     //
 153     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 154     //
 155     // LP64: rcx and rdx are first and second argument registers on windows
 156 
 157     __ push(rbp);
 158 #ifdef _LP64
 159     __ mov(rbp, c_rarg0); // cpuid_info address
 160 #else
 161     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 162 #endif
 163     __ push(rbx);
 164     __ push(rsi);
 165     __ pushf();          // preserve rbx, and flags
 166     __ pop(rax);
 167     __ push(rax);
 168     __ mov(rcx, rax);
 169     //
 170     // if we are unable to change the AC flag, we have a 386
 171     //
 172     __ xorl(rax, HS_EFL_AC);
 173     __ push(rax);
 174     __ popf();
 175     __ pushf();
 176     __ pop(rax);
 177     __ cmpptr(rax, rcx);
 178     __ jccb(Assembler::notEqual, detect_486);
 179 
 180     __ movl(rax, CPU_FAMILY_386);
 181     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 182     __ jmp(done);
 183 
 184     //
 185     // If we are unable to change the ID flag, we have a 486 which does
 186     // not support the "cpuid" instruction.
 187     //
 188     __ bind(detect_486);
 189     __ mov(rax, rcx);
 190     __ xorl(rax, HS_EFL_ID);
 191     __ push(rax);
 192     __ popf();
 193     __ pushf();
 194     __ pop(rax);
 195     __ cmpptr(rcx, rax);
 196     __ jccb(Assembler::notEqual, detect_586);
 197 
 198     __ bind(cpu486);
 199     __ movl(rax, CPU_FAMILY_486);
 200     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 201     __ jmp(done);
 202 
 203     //
 204     // At this point, we have a chip which supports the "cpuid" instruction
 205     //
 206     __ bind(detect_586);
 207     __ xorl(rax, rax);
 208     __ cpuid();
 209     __ orl(rax, rax);
 210     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 211                                         // value of at least 1, we give up and
 212                                         // assume a 486
 213     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 214     __ movl(Address(rsi, 0), rax);
 215     __ movl(Address(rsi, 4), rbx);
 216     __ movl(Address(rsi, 8), rcx);
 217     __ movl(Address(rsi,12), rdx);
 218 
 219     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 220     __ jccb(Assembler::belowEqual, std_cpuid4);
 221 
 222     //
 223     // cpuid(0xB) Processor Topology
 224     //
 225     __ movl(rax, 0xb);
 226     __ xorl(rcx, rcx);   // Threads level
 227     __ cpuid();
 228 
 229     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 230     __ movl(Address(rsi, 0), rax);
 231     __ movl(Address(rsi, 4), rbx);
 232     __ movl(Address(rsi, 8), rcx);
 233     __ movl(Address(rsi,12), rdx);
 234 
 235     __ movl(rax, 0xb);
 236     __ movl(rcx, 1);     // Cores level
 237     __ cpuid();
 238     __ push(rax);
 239     __ andl(rax, 0x1f);  // Determine if valid topology level
 240     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 241     __ andl(rax, 0xffff);
 242     __ pop(rax);
 243     __ jccb(Assembler::equal, std_cpuid4);
 244 
 245     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 246     __ movl(Address(rsi, 0), rax);
 247     __ movl(Address(rsi, 4), rbx);
 248     __ movl(Address(rsi, 8), rcx);
 249     __ movl(Address(rsi,12), rdx);
 250 
 251     __ movl(rax, 0xb);
 252     __ movl(rcx, 2);     // Packages level
 253     __ cpuid();
 254     __ push(rax);
 255     __ andl(rax, 0x1f);  // Determine if valid topology level
 256     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 257     __ andl(rax, 0xffff);
 258     __ pop(rax);
 259     __ jccb(Assembler::equal, std_cpuid4);
 260 
 261     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 262     __ movl(Address(rsi, 0), rax);
 263     __ movl(Address(rsi, 4), rbx);
 264     __ movl(Address(rsi, 8), rcx);
 265     __ movl(Address(rsi,12), rdx);
 266 
 267     //
 268     // cpuid(0x4) Deterministic cache params
 269     //
 270     __ bind(std_cpuid4);
 271     __ movl(rax, 4);
 272     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 273     __ jccb(Assembler::greater, std_cpuid1);
 274 
 275     __ xorl(rcx, rcx);   // L1 cache
 276     __ cpuid();
 277     __ push(rax);
 278     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 279     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 280     __ pop(rax);
 281     __ jccb(Assembler::equal, std_cpuid1);
 282 
 283     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 284     __ movl(Address(rsi, 0), rax);
 285     __ movl(Address(rsi, 4), rbx);
 286     __ movl(Address(rsi, 8), rcx);
 287     __ movl(Address(rsi,12), rdx);
 288 
 289     //
 290     // Standard cpuid(0x1)
 291     //
 292     __ bind(std_cpuid1);
 293     __ movl(rax, 1);
 294     __ cpuid();
 295     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 296     __ movl(Address(rsi, 0), rax);
 297     __ movl(Address(rsi, 4), rbx);
 298     __ movl(Address(rsi, 8), rcx);
 299     __ movl(Address(rsi,12), rdx);
 300 
 301     //
 302     // Check if OS has enabled XGETBV instruction to access XCR0
 303     // (OSXSAVE feature flag) and CPU supports AVX
 304     //
 305     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 306     __ cmpl(rcx, 0x18000000);
 307     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 308 
 309     //
 310     // XCR0, XFEATURE_ENABLED_MASK register
 311     //
 312     __ xorl(rcx, rcx);   // zero for XCR0 register
 313     __ xgetbv();
 314     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 315     __ movl(Address(rsi, 0), rax);
 316     __ movl(Address(rsi, 4), rdx);
 317 
 318     //
 319     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 320     //
 321     __ bind(sef_cpuid);
 322     __ movl(rax, 7);
 323     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 324     __ jccb(Assembler::greater, ext_cpuid);
 325     // ECX = 0
 326     __ xorl(rcx, rcx);
 327     __ cpuid();
 328     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 329     __ movl(Address(rsi, 0), rax);
 330     __ movl(Address(rsi, 4), rbx);
 331     __ movl(Address(rsi, 8), rcx);
 332     __ movl(Address(rsi, 12), rdx);
 333 
 334     //
 335     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 336     //
 337     __ bind(sefsl1_cpuid);
 338     __ movl(rax, 7);
 339     __ movl(rcx, 1);
 340     __ cpuid();
 341     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 342     __ movl(Address(rsi, 0), rax);
 343     __ movl(Address(rsi, 4), rdx);
 344 
 345     //
 346     // Extended cpuid(0x80000000)
 347     //
 348     __ bind(ext_cpuid);
 349     __ movl(rax, 0x80000000);
 350     __ cpuid();
 351     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 352     __ jcc(Assembler::belowEqual, done);
 353     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 354     __ jcc(Assembler::belowEqual, ext_cpuid1);
 355     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 356     __ jccb(Assembler::belowEqual, ext_cpuid5);
 357     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 358     __ jccb(Assembler::belowEqual, ext_cpuid7);
 359     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 360     __ jccb(Assembler::belowEqual, ext_cpuid8);
 361     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 362     __ jccb(Assembler::below, ext_cpuid8);
 363     //
 364     // Extended cpuid(0x8000001E)
 365     //
 366     __ movl(rax, 0x8000001E);
 367     __ cpuid();
 368     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 369     __ movl(Address(rsi, 0), rax);
 370     __ movl(Address(rsi, 4), rbx);
 371     __ movl(Address(rsi, 8), rcx);
 372     __ movl(Address(rsi,12), rdx);
 373 
 374     //
 375     // Extended cpuid(0x80000008)
 376     //
 377     __ bind(ext_cpuid8);
 378     __ movl(rax, 0x80000008);
 379     __ cpuid();
 380     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 381     __ movl(Address(rsi, 0), rax);
 382     __ movl(Address(rsi, 4), rbx);
 383     __ movl(Address(rsi, 8), rcx);
 384     __ movl(Address(rsi,12), rdx);
 385 
 386     //
 387     // Extended cpuid(0x80000007)
 388     //
 389     __ bind(ext_cpuid7);
 390     __ movl(rax, 0x80000007);
 391     __ cpuid();
 392     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 393     __ movl(Address(rsi, 0), rax);
 394     __ movl(Address(rsi, 4), rbx);
 395     __ movl(Address(rsi, 8), rcx);
 396     __ movl(Address(rsi,12), rdx);
 397 
 398     //
 399     // Extended cpuid(0x80000005)
 400     //
 401     __ bind(ext_cpuid5);
 402     __ movl(rax, 0x80000005);
 403     __ cpuid();
 404     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 405     __ movl(Address(rsi, 0), rax);
 406     __ movl(Address(rsi, 4), rbx);
 407     __ movl(Address(rsi, 8), rcx);
 408     __ movl(Address(rsi,12), rdx);
 409 
 410     //
 411     // Extended cpuid(0x80000001)
 412     //
 413     __ bind(ext_cpuid1);
 414     __ movl(rax, 0x80000001);
 415     __ cpuid();
 416     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 417     __ movl(Address(rsi, 0), rax);
 418     __ movl(Address(rsi, 4), rbx);
 419     __ movl(Address(rsi, 8), rcx);
 420     __ movl(Address(rsi,12), rdx);
 421 
 422 #if defined(_LP64)
 423     //
 424     // Check if OS has enabled XGETBV instruction to access XCR0
 425     // (OSXSAVE feature flag) and CPU supports APX
 426     //
 427     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 428     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 429     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 430     __ movl(rax, 0x200000);
 431     __ andl(rax, Address(rsi, 4));
 432     __ cmpl(rax, 0x200000);
 433     __ jcc(Assembler::notEqual, vector_save_restore);
 434     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 435     __ movl(rax, 0x80000);
 436     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 437     __ cmpl(rax, 0x80000);
 438     __ jcc(Assembler::notEqual, vector_save_restore);
 439 
 440 #ifndef PRODUCT
 441     bool save_apx = UseAPX;
 442     VM_Version::set_apx_cpuFeatures();
 443     UseAPX = true;
 444     __ mov64(r16, VM_Version::egpr_test_value());
 445     __ mov64(r31, VM_Version::egpr_test_value());
 446     __ xorl(rsi, rsi);
 447     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 448     // Generate SEGV
 449     __ movl(rax, Address(rsi, 0));
 450 
 451     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 452     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 453     __ movq(Address(rsi, 0), r16);
 454     __ movq(Address(rsi, 8), r31);
 455 
 456     UseAPX = save_apx;
 457 #endif
 458 #endif
 459     __ bind(vector_save_restore);
 460     //
 461     // Check if OS has enabled XGETBV instruction to access XCR0
 462     // (OSXSAVE feature flag) and CPU supports AVX
 463     //
 464     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 465     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 466     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 467     __ cmpl(rcx, 0x18000000);
 468     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 469 
 470     __ movl(rax, 0x6);
 471     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 472     __ cmpl(rax, 0x6);
 473     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 474 
 475     // we need to bridge farther than imm8, so we use this island as a thunk
 476     __ bind(done);
 477     __ jmp(wrapup);
 478 
 479     __ bind(start_simd_check);
 480     //
 481     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 482     // registers are not restored after a signal processing.
 483     // Generate SEGV here (reference through null)
 484     // and check upper YMM/ZMM bits after it.
 485     //
 486     int saved_useavx = UseAVX;
 487     int saved_usesse = UseSSE;
 488 
 489     // If UseAVX is uninitialized or is set by the user to include EVEX
 490     if (use_evex) {
 491       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 492       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 493       __ movl(rax, 0x10000);
 494       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 495       __ cmpl(rax, 0x10000);
 496       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 497       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 498       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 499       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 500       __ movl(rax, 0xE0);
 501       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 502       __ cmpl(rax, 0xE0);
 503       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 504 
 505       if (FLAG_IS_DEFAULT(UseAVX)) {
 506         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 507         __ movl(rax, Address(rsi, 0));
 508         __ cmpl(rax, 0x50654);              // If it is Skylake
 509         __ jcc(Assembler::equal, legacy_setup);
 510       }
 511       // EVEX setup: run in lowest evex mode
 512       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 513       UseAVX = 3;
 514       UseSSE = 2;
 515 #ifdef _WINDOWS
 516       // xmm5-xmm15 are not preserved by caller on windows
 517       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 518       __ subptr(rsp, 64);
 519       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 520 #ifdef _LP64
 521       __ subptr(rsp, 64);
 522       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 523       __ subptr(rsp, 64);
 524       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 525 #endif // _LP64
 526 #endif // _WINDOWS
 527 
 528       // load value into all 64 bytes of zmm7 register
 529       __ movl(rcx, VM_Version::ymm_test_value());
 530       __ movdl(xmm0, rcx);
 531       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 532       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 533 #ifdef _LP64
 534       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 535       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 536 #endif
 537       VM_Version::clean_cpuFeatures();
 538       __ jmp(save_restore_except);
 539     }
 540 
 541     __ bind(legacy_setup);
 542     // AVX setup
 543     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 544     UseAVX = 1;
 545     UseSSE = 2;
 546 #ifdef _WINDOWS
 547     __ subptr(rsp, 32);
 548     __ vmovdqu(Address(rsp, 0), xmm7);
 549 #ifdef _LP64
 550     __ subptr(rsp, 32);
 551     __ vmovdqu(Address(rsp, 0), xmm8);
 552     __ subptr(rsp, 32);
 553     __ vmovdqu(Address(rsp, 0), xmm15);
 554 #endif // _LP64
 555 #endif // _WINDOWS
 556 
 557     // load value into all 32 bytes of ymm7 register
 558     __ movl(rcx, VM_Version::ymm_test_value());
 559 
 560     __ movdl(xmm0, rcx);
 561     __ pshufd(xmm0, xmm0, 0x00);
 562     __ vinsertf128_high(xmm0, xmm0);
 563     __ vmovdqu(xmm7, xmm0);
 564 #ifdef _LP64
 565     __ vmovdqu(xmm8, xmm0);
 566     __ vmovdqu(xmm15, xmm0);
 567 #endif
 568     VM_Version::clean_cpuFeatures();
 569 
 570     __ bind(save_restore_except);
 571     __ xorl(rsi, rsi);
 572     VM_Version::set_cpuinfo_segv_addr(__ pc());
 573     // Generate SEGV
 574     __ movl(rax, Address(rsi, 0));
 575 
 576     VM_Version::set_cpuinfo_cont_addr(__ pc());
 577     // Returns here after signal. Save xmm0 to check it later.
 578 
 579     // If UseAVX is uninitialized or is set by the user to include EVEX
 580     if (use_evex) {
 581       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 582       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 583       __ movl(rax, 0x10000);
 584       __ andl(rax, Address(rsi, 4));
 585       __ cmpl(rax, 0x10000);
 586       __ jcc(Assembler::notEqual, legacy_save_restore);
 587       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 588       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 589       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 590       __ movl(rax, 0xE0);
 591       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 592       __ cmpl(rax, 0xE0);
 593       __ jcc(Assembler::notEqual, legacy_save_restore);
 594 
 595       if (FLAG_IS_DEFAULT(UseAVX)) {
 596         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 597         __ movl(rax, Address(rsi, 0));
 598         __ cmpl(rax, 0x50654);              // If it is Skylake
 599         __ jcc(Assembler::equal, legacy_save_restore);
 600       }
 601       // EVEX check: run in lowest evex mode
 602       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 603       UseAVX = 3;
 604       UseSSE = 2;
 605       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 606       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 607       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 608 #ifdef _LP64
 609       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 610       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 611 #endif
 612 
 613 #ifdef _WINDOWS
 614 #ifdef _LP64
 615       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 616       __ addptr(rsp, 64);
 617       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 618       __ addptr(rsp, 64);
 619 #endif // _LP64
 620       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 621       __ addptr(rsp, 64);
 622 #endif // _WINDOWS
 623       generate_vzeroupper(wrapup);
 624       VM_Version::clean_cpuFeatures();
 625       UseAVX = saved_useavx;
 626       UseSSE = saved_usesse;
 627       __ jmp(wrapup);
 628    }
 629 
 630     __ bind(legacy_save_restore);
 631     // AVX check
 632     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 633     UseAVX = 1;
 634     UseSSE = 2;
 635     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 636     __ vmovdqu(Address(rsi, 0), xmm0);
 637     __ vmovdqu(Address(rsi, 32), xmm7);
 638 #ifdef _LP64
 639     __ vmovdqu(Address(rsi, 64), xmm8);
 640     __ vmovdqu(Address(rsi, 96), xmm15);
 641 #endif
 642 
 643 #ifdef _WINDOWS
 644 #ifdef _LP64
 645     __ vmovdqu(xmm15, Address(rsp, 0));
 646     __ addptr(rsp, 32);
 647     __ vmovdqu(xmm8, Address(rsp, 0));
 648     __ addptr(rsp, 32);
 649 #endif // _LP64
 650     __ vmovdqu(xmm7, Address(rsp, 0));
 651     __ addptr(rsp, 32);
 652 #endif // _WINDOWS
 653 
 654     generate_vzeroupper(wrapup);
 655     VM_Version::clean_cpuFeatures();
 656     UseAVX = saved_useavx;
 657     UseSSE = saved_usesse;
 658 
 659     __ bind(wrapup);
 660     __ popf();
 661     __ pop(rsi);
 662     __ pop(rbx);
 663     __ pop(rbp);
 664     __ ret(0);
 665 
 666 #   undef __
 667 
 668     return start;
 669   };
 670   void generate_vzeroupper(Label& L_wrapup) {
 671 #   define __ _masm->
 672     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 673     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 674     __ jcc(Assembler::notEqual, L_wrapup);
 675     __ movl(rcx, 0x0FFF0FF0);
 676     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 677     __ andl(rcx, Address(rsi, 0));
 678     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 679     __ jcc(Assembler::equal, L_wrapup);
 680     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 681     __ jcc(Assembler::equal, L_wrapup);
 682     // vzeroupper() will use a pre-computed instruction sequence that we
 683     // can't compute until after we've determined CPU capabilities. Use
 684     // uncached variant here directly to be able to bootstrap correctly
 685     __ vzeroupper_uncached();
 686 #   undef __
 687   }
 688   address generate_detect_virt() {
 689     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 690 #   define __ _masm->
 691 
 692     address start = __ pc();
 693 
 694     // Evacuate callee-saved registers
 695     __ push(rbp);
 696     __ push(rbx);
 697     __ push(rsi); // for Windows
 698 
 699 #ifdef _LP64
 700     __ mov(rax, c_rarg0); // CPUID leaf
 701     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 702 #else
 703     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 704     __ movptr(rsi, Address(rsp, 20)); // register array address
 705 #endif
 706 
 707     __ cpuid();
 708 
 709     // Store result to register array
 710     __ movl(Address(rsi,  0), rax);
 711     __ movl(Address(rsi,  4), rbx);
 712     __ movl(Address(rsi,  8), rcx);
 713     __ movl(Address(rsi, 12), rdx);
 714 
 715     // Epilogue
 716     __ pop(rsi);
 717     __ pop(rbx);
 718     __ pop(rbp);
 719     __ ret(0);
 720 
 721 #   undef __
 722 
 723     return start;
 724   };
 725 
 726 
 727   address generate_getCPUIDBrandString(void) {
 728     // Flags to test CPU type.
 729     const uint32_t HS_EFL_AC           = 0x40000;
 730     const uint32_t HS_EFL_ID           = 0x200000;
 731     // Values for when we don't have a CPUID instruction.
 732     const int      CPU_FAMILY_SHIFT = 8;
 733     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 734     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 735 
 736     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 737 
 738     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 739 #   define __ _masm->
 740 
 741     address start = __ pc();
 742 
 743     //
 744     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 745     //
 746     // LP64: rcx and rdx are first and second argument registers on windows
 747 
 748     __ push(rbp);
 749 #ifdef _LP64
 750     __ mov(rbp, c_rarg0); // cpuid_info address
 751 #else
 752     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 753 #endif
 754     __ push(rbx);
 755     __ push(rsi);
 756     __ pushf();          // preserve rbx, and flags
 757     __ pop(rax);
 758     __ push(rax);
 759     __ mov(rcx, rax);
 760     //
 761     // if we are unable to change the AC flag, we have a 386
 762     //
 763     __ xorl(rax, HS_EFL_AC);
 764     __ push(rax);
 765     __ popf();
 766     __ pushf();
 767     __ pop(rax);
 768     __ cmpptr(rax, rcx);
 769     __ jccb(Assembler::notEqual, detect_486);
 770 
 771     __ movl(rax, CPU_FAMILY_386);
 772     __ jmp(done);
 773 
 774     //
 775     // If we are unable to change the ID flag, we have a 486 which does
 776     // not support the "cpuid" instruction.
 777     //
 778     __ bind(detect_486);
 779     __ mov(rax, rcx);
 780     __ xorl(rax, HS_EFL_ID);
 781     __ push(rax);
 782     __ popf();
 783     __ pushf();
 784     __ pop(rax);
 785     __ cmpptr(rcx, rax);
 786     __ jccb(Assembler::notEqual, detect_586);
 787 
 788     __ bind(cpu486);
 789     __ movl(rax, CPU_FAMILY_486);
 790     __ jmp(done);
 791 
 792     //
 793     // At this point, we have a chip which supports the "cpuid" instruction
 794     //
 795     __ bind(detect_586);
 796     __ xorl(rax, rax);
 797     __ cpuid();
 798     __ orl(rax, rax);
 799     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 800                                         // value of at least 1, we give up and
 801                                         // assume a 486
 802 
 803     //
 804     // Extended cpuid(0x80000000) for processor brand string detection
 805     //
 806     __ bind(ext_cpuid);
 807     __ movl(rax, CPUID_EXTENDED_FN);
 808     __ cpuid();
 809     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 810     __ jcc(Assembler::below, done);
 811 
 812     //
 813     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 814     //
 815     __ movl(rax, CPUID_EXTENDED_FN_2);
 816     __ cpuid();
 817     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 818     __ movl(Address(rsi, 0), rax);
 819     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 820     __ movl(Address(rsi, 0), rbx);
 821     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 822     __ movl(Address(rsi, 0), rcx);
 823     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 824     __ movl(Address(rsi,0), rdx);
 825 
 826     //
 827     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 828     //
 829     __ movl(rax, CPUID_EXTENDED_FN_3);
 830     __ cpuid();
 831     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 832     __ movl(Address(rsi, 0), rax);
 833     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 834     __ movl(Address(rsi, 0), rbx);
 835     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 836     __ movl(Address(rsi, 0), rcx);
 837     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 838     __ movl(Address(rsi,0), rdx);
 839 
 840     //
 841     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 842     //
 843     __ movl(rax, CPUID_EXTENDED_FN_4);
 844     __ cpuid();
 845     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 846     __ movl(Address(rsi, 0), rax);
 847     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 848     __ movl(Address(rsi, 0), rbx);
 849     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 850     __ movl(Address(rsi, 0), rcx);
 851     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 852     __ movl(Address(rsi,0), rdx);
 853 
 854     //
 855     // return
 856     //
 857     __ bind(done);
 858     __ popf();
 859     __ pop(rsi);
 860     __ pop(rbx);
 861     __ pop(rbp);
 862     __ ret(0);
 863 
 864 #   undef __
 865 
 866     return start;
 867   };
 868 };
 869 
 870 void VM_Version::get_processor_features() {
 871 
 872   _cpu = 4; // 486 by default
 873   _model = 0;
 874   _stepping = 0;
 875   _features = 0;
 876   _logical_processors_per_package = 1;
 877   // i486 internal cache is both I&D and has a 16-byte line size
 878   _L1_data_cache_line_size = 16;
 879 
 880   // Get raw processor info
 881 
 882   get_cpu_info_stub(&_cpuid_info);
 883 
 884   assert_is_initialized();
 885   _cpu = extended_cpu_family();
 886   _model = extended_cpu_model();
 887   _stepping = cpu_stepping();
 888 
 889   if (cpu_family() > 4) { // it supports CPUID
 890     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 891     _cpu_features = _features;   // Preserve features
 892     // Logical processors are only available on P4s and above,
 893     // and only if hyperthreading is available.
 894     _logical_processors_per_package = logical_processor_count();
 895     _L1_data_cache_line_size = L1_line_size();
 896   }
 897 
 898   // xchg and xadd instructions
 899   _supports_atomic_getset4 = true;
 900   _supports_atomic_getadd4 = true;
 901   LP64_ONLY(_supports_atomic_getset8 = true);
 902   LP64_ONLY(_supports_atomic_getadd8 = true);
 903 
 904 #ifdef _LP64
 905   // OS should support SSE for x64 and hardware should support at least SSE2.
 906   if (!VM_Version::supports_sse2()) {
 907     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 908   }
 909   // in 64 bit the use of SSE2 is the minimum
 910   if (UseSSE < 2) UseSSE = 2;
 911 #endif
 912 
 913 #ifdef AMD64
 914   // flush_icache_stub have to be generated first.
 915   // That is why Icache line size is hard coded in ICache class,
 916   // see icache_x86.hpp. It is also the reason why we can't use
 917   // clflush instruction in 32-bit VM since it could be running
 918   // on CPU which does not support it.
 919   //
 920   // The only thing we can do is to verify that flushed
 921   // ICache::line_size has correct value.
 922   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 923   // clflush_size is size in quadwords (8 bytes).
 924   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 925 #endif
 926 
 927 #ifdef _LP64
 928   // assigning this field effectively enables Unsafe.writebackMemory()
 929   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 930   // that is only implemented on x86_64 and only if the OS plays ball
 931   if (os::supports_map_sync()) {
 932     // publish data cache line flush size to generic field, otherwise
 933     // let if default to zero thereby disabling writeback
 934     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 935   }
 936 #endif
 937 
 938   // Check if processor has Intel Ecore
 939   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 940     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
 941     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 942   }
 943 
 944   if (UseSSE < 4) {
 945     _features &= ~CPU_SSE4_1;
 946     _features &= ~CPU_SSE4_2;
 947   }
 948 
 949   if (UseSSE < 3) {
 950     _features &= ~CPU_SSE3;
 951     _features &= ~CPU_SSSE3;
 952     _features &= ~CPU_SSE4A;
 953   }
 954 
 955   if (UseSSE < 2)
 956     _features &= ~CPU_SSE2;
 957 
 958   if (UseSSE < 1)
 959     _features &= ~CPU_SSE;
 960 
 961   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 962   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 963     UseAVX = 0;
 964   }
 965 
 966   // UseSSE is set to the smaller of what hardware supports and what
 967   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 968   // older Pentiums which do not support it.
 969   int use_sse_limit = 0;
 970   if (UseSSE > 0) {
 971     if (UseSSE > 3 && supports_sse4_1()) {
 972       use_sse_limit = 4;
 973     } else if (UseSSE > 2 && supports_sse3()) {
 974       use_sse_limit = 3;
 975     } else if (UseSSE > 1 && supports_sse2()) {
 976       use_sse_limit = 2;
 977     } else if (UseSSE > 0 && supports_sse()) {
 978       use_sse_limit = 1;
 979     } else {
 980       use_sse_limit = 0;
 981     }
 982   }
 983   if (FLAG_IS_DEFAULT(UseSSE)) {
 984     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 985   } else if (UseSSE > use_sse_limit) {
 986     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 987     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 988   }
 989 
 990   // first try initial setting and detect what we can support
 991   int use_avx_limit = 0;
 992   if (UseAVX > 0) {
 993     if (UseSSE < 4) {
 994       // Don't use AVX if SSE is unavailable or has been disabled.
 995       use_avx_limit = 0;
 996     } else if (UseAVX > 2 && supports_evex()) {
 997       use_avx_limit = 3;
 998     } else if (UseAVX > 1 && supports_avx2()) {
 999       use_avx_limit = 2;
1000     } else if (UseAVX > 0 && supports_avx()) {
1001       use_avx_limit = 1;
1002     } else {
1003       use_avx_limit = 0;
1004     }
1005   }
1006   if (FLAG_IS_DEFAULT(UseAVX)) {
1007     // Don't use AVX-512 on older Skylakes unless explicitly requested.
1008     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1009       FLAG_SET_DEFAULT(UseAVX, 2);
1010     } else {
1011       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1012     }
1013   }
1014 
1015   if (UseAVX > use_avx_limit) {
1016     if (UseSSE < 4) {
1017       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1018     } else {
1019       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1020     }
1021     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1022   }
1023 
1024   if (UseAVX < 3) {
1025     _features &= ~CPU_AVX512F;
1026     _features &= ~CPU_AVX512DQ;
1027     _features &= ~CPU_AVX512CD;
1028     _features &= ~CPU_AVX512BW;
1029     _features &= ~CPU_AVX512VL;
1030     _features &= ~CPU_AVX512_VPOPCNTDQ;
1031     _features &= ~CPU_AVX512_VPCLMULQDQ;
1032     _features &= ~CPU_AVX512_VAES;
1033     _features &= ~CPU_AVX512_VNNI;
1034     _features &= ~CPU_AVX512_VBMI;
1035     _features &= ~CPU_AVX512_VBMI2;
1036     _features &= ~CPU_AVX512_BITALG;
1037     _features &= ~CPU_AVX512_IFMA;
1038     _features &= ~CPU_APX_F;
1039   }
1040 
1041   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1042   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1043   if (UseAPX && !apx_supported) {
1044     warning("UseAPX is not supported on this CPU, setting it to false");
1045     FLAG_SET_DEFAULT(UseAPX, false);
1046   } else if (FLAG_IS_DEFAULT(UseAPX)) {
1047     FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1048   }
1049 
1050   if (!UseAPX) {
1051     _features &= ~CPU_APX_F;
1052   }
1053 
1054   if (UseAVX < 2) {
1055     _features &= ~CPU_AVX2;
1056     _features &= ~CPU_AVX_IFMA;
1057   }
1058 
1059   if (UseAVX < 1) {
1060     _features &= ~CPU_AVX;
1061     _features &= ~CPU_VZEROUPPER;
1062     _features &= ~CPU_F16C;
1063     _features &= ~CPU_SHA512;
1064   }
1065 
1066   if (logical_processors_per_package() == 1) {
1067     // HT processor could be installed on a system which doesn't support HT.
1068     _features &= ~CPU_HT;
1069   }
1070 
1071   if (is_intel()) { // Intel cpus specific settings
1072     if (is_knights_family()) {
1073       _features &= ~CPU_VZEROUPPER;
1074       _features &= ~CPU_AVX512BW;
1075       _features &= ~CPU_AVX512VL;
1076       _features &= ~CPU_AVX512DQ;
1077       _features &= ~CPU_AVX512_VNNI;
1078       _features &= ~CPU_AVX512_VAES;
1079       _features &= ~CPU_AVX512_VPOPCNTDQ;
1080       _features &= ~CPU_AVX512_VPCLMULQDQ;
1081       _features &= ~CPU_AVX512_VBMI;
1082       _features &= ~CPU_AVX512_VBMI2;
1083       _features &= ~CPU_CLWB;
1084       _features &= ~CPU_FLUSHOPT;
1085       _features &= ~CPU_GFNI;
1086       _features &= ~CPU_AVX512_BITALG;
1087       _features &= ~CPU_AVX512_IFMA;
1088       _features &= ~CPU_AVX_IFMA;
1089     }
1090   }
1091 
1092   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1093     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1094   } else {
1095     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1096   }
1097 
1098   char buf[1024];
1099   int res = jio_snprintf(
1100               buf, sizeof(buf),
1101               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1102               cores_per_cpu(), threads_per_core(),
1103               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1104   assert(res > 0, "not enough temporary space allocated");
1105   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1106 
1107   _features_string = os::strdup(buf);
1108 
1109   // Use AES instructions if available.
1110   if (supports_aes()) {
1111     if (FLAG_IS_DEFAULT(UseAES)) {
1112       FLAG_SET_DEFAULT(UseAES, true);
1113     }
1114     if (!UseAES) {
1115       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1116         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1117       }
1118       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1119     } else {
1120       if (UseSSE > 2) {
1121         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1122           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1123         }
1124       } else {
1125         // The AES intrinsic stubs require AES instruction support (of course)
1126         // but also require sse3 mode or higher for instructions it use.
1127         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1128           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1129         }
1130         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1131       }
1132 
1133       // --AES-CTR begins--
1134       if (!UseAESIntrinsics) {
1135         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1136           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1137           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1138         }
1139       } else {
1140         if (supports_sse4_1()) {
1141           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1142             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1143           }
1144         } else {
1145            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1146            // but also require sse4.1 mode or higher for instructions it use.
1147           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1148              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1149            }
1150            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1151         }
1152       }
1153       // --AES-CTR ends--
1154     }
1155   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1156     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1157       warning("AES instructions are not available on this CPU");
1158       FLAG_SET_DEFAULT(UseAES, false);
1159     }
1160     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1161       warning("AES intrinsics are not available on this CPU");
1162       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1163     }
1164     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1165       warning("AES-CTR intrinsics are not available on this CPU");
1166       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1167     }
1168   }
1169 
1170   // Use CLMUL instructions if available.
1171   if (supports_clmul()) {
1172     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1173       UseCLMUL = true;
1174     }
1175   } else if (UseCLMUL) {
1176     if (!FLAG_IS_DEFAULT(UseCLMUL))
1177       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1178     FLAG_SET_DEFAULT(UseCLMUL, false);
1179   }
1180 
1181   if (UseCLMUL && (UseSSE > 2)) {
1182     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1183       UseCRC32Intrinsics = true;
1184     }
1185   } else if (UseCRC32Intrinsics) {
1186     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1187       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1188     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1189   }
1190 
1191 #ifdef _LP64
1192   if (supports_avx2()) {
1193     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1194       UseAdler32Intrinsics = true;
1195     }
1196   } else if (UseAdler32Intrinsics) {
1197     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1198       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1199     }
1200     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1201   }
1202 #else
1203   if (UseAdler32Intrinsics) {
1204     warning("Adler32Intrinsics not available on this CPU.");
1205     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1206   }
1207 #endif
1208 
1209   if (supports_sse4_2() && supports_clmul()) {
1210     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1211       UseCRC32CIntrinsics = true;
1212     }
1213   } else if (UseCRC32CIntrinsics) {
1214     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1215       warning("CRC32C intrinsics are not available on this CPU");
1216     }
1217     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1218   }
1219 
1220   // GHASH/GCM intrinsics
1221   if (UseCLMUL && (UseSSE > 2)) {
1222     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1223       UseGHASHIntrinsics = true;
1224     }
1225   } else if (UseGHASHIntrinsics) {
1226     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1227       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1228     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1229   }
1230 
1231 #ifdef _LP64
1232   // ChaCha20 Intrinsics
1233   // As long as the system supports AVX as a baseline we can do a
1234   // SIMD-enabled block function.  StubGenerator makes the determination
1235   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1236   // version.
1237   if (UseAVX >= 1) {
1238       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1239           UseChaCha20Intrinsics = true;
1240       }
1241   } else if (UseChaCha20Intrinsics) {
1242       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1243           warning("ChaCha20 intrinsic requires AVX instructions");
1244       }
1245       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1246   }
1247 #else
1248   // No support currently for ChaCha20 intrinsics on 32-bit platforms
1249   if (UseChaCha20Intrinsics) {
1250       warning("ChaCha20 intrinsics are not available on this CPU.");
1251       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1252   }
1253 #endif // _LP64
1254 
1255   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1256   if (UseAVX >= 2) {
1257     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1258       UseBASE64Intrinsics = true;
1259     }
1260   } else if (UseBASE64Intrinsics) {
1261      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1262       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1263     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1264   }
1265 
1266   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1267     if (FLAG_IS_DEFAULT(UseFMA)) {
1268       UseFMA = true;
1269     }
1270   } else if (UseFMA) {
1271     warning("FMA instructions are not available on this CPU");
1272     FLAG_SET_DEFAULT(UseFMA, false);
1273   }
1274 
1275   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1276     UseMD5Intrinsics = true;
1277   }
1278 
1279   if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1280     if (FLAG_IS_DEFAULT(UseSHA)) {
1281       UseSHA = true;
1282     }
1283   } else if (UseSHA) {
1284     warning("SHA instructions are not available on this CPU");
1285     FLAG_SET_DEFAULT(UseSHA, false);
1286   }
1287 
1288   if (supports_sha() && supports_sse4_1() && UseSHA) {
1289     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1290       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1291     }
1292   } else if (UseSHA1Intrinsics) {
1293     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1294     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1295   }
1296 
1297   if (supports_sse4_1() && UseSHA) {
1298     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1299       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1300     }
1301   } else if (UseSHA256Intrinsics) {
1302     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1303     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1304   }
1305 
1306 #ifdef _LP64
1307   // These are only supported on 64-bit
1308   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1309     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1310       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1311     }
1312   } else
1313 #endif
1314   if (UseSHA512Intrinsics) {
1315     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1316     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1317   }
1318 
1319   if (UseSHA3Intrinsics) {
1320     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1321     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1322   }
1323 
1324   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1325     FLAG_SET_DEFAULT(UseSHA, false);
1326   }
1327 
1328 #ifdef COMPILER2
1329   if (UseFPUForSpilling) {
1330     if (UseSSE < 2) {
1331       // Only supported with SSE2+
1332       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1333     }
1334   }
1335 #endif
1336 
1337 #if COMPILER2_OR_JVMCI
1338   int max_vector_size = 0;
1339   if (UseSSE < 2) {
1340     // Vectors (in XMM) are only supported with SSE2+
1341     // SSE is always 2 on x64.
1342     max_vector_size = 0;
1343   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1344     // 16 byte vectors (in XMM) are supported with SSE2+
1345     max_vector_size = 16;
1346   } else if (UseAVX == 1 || UseAVX == 2) {
1347     // 32 bytes vectors (in YMM) are only supported with AVX+
1348     max_vector_size = 32;
1349   } else if (UseAVX > 2) {
1350     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1351     max_vector_size = 64;
1352   }
1353 
1354 #ifdef _LP64
1355   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1356 #else
1357   int min_vector_size = 0;
1358 #endif
1359 
1360   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1361     if (MaxVectorSize < min_vector_size) {
1362       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1363       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1364     }
1365     if (MaxVectorSize > max_vector_size) {
1366       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1367       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1368     }
1369     if (!is_power_of_2(MaxVectorSize)) {
1370       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1371       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1372     }
1373   } else {
1374     // If default, use highest supported configuration
1375     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1376   }
1377 
1378 #if defined(COMPILER2) && defined(ASSERT)
1379   if (MaxVectorSize > 0) {
1380     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1381       tty->print_cr("State of YMM registers after signal handle:");
1382       int nreg = 2 LP64_ONLY(+2);
1383       const char* ymm_name[4] = {"0", "7", "8", "15"};
1384       for (int i = 0; i < nreg; i++) {
1385         tty->print("YMM%s:", ymm_name[i]);
1386         for (int j = 7; j >=0; j--) {
1387           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1388         }
1389         tty->cr();
1390       }
1391     }
1392   }
1393 #endif // COMPILER2 && ASSERT
1394 
1395 #ifdef _LP64
1396   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1397     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1398       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1399     }
1400   } else
1401 #endif
1402   if (UsePoly1305Intrinsics) {
1403     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1404     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1405   }
1406 
1407 #ifdef _LP64
1408   if (supports_avx512ifma() && supports_avx512vlbw()) {
1409     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1410       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1411     }
1412   } else
1413 #endif
1414   if (UseIntPolyIntrinsics) {
1415     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1416     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1417   }
1418 
1419 #ifdef _LP64
1420   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1421     UseMultiplyToLenIntrinsic = true;
1422   }
1423   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1424     UseSquareToLenIntrinsic = true;
1425   }
1426   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1427     UseMulAddIntrinsic = true;
1428   }
1429   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1430     UseMontgomeryMultiplyIntrinsic = true;
1431   }
1432   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1433     UseMontgomerySquareIntrinsic = true;
1434   }
1435 #else
1436   if (UseMultiplyToLenIntrinsic) {
1437     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1438       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1439     }
1440     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1441   }
1442   if (UseMontgomeryMultiplyIntrinsic) {
1443     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1444       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1445     }
1446     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1447   }
1448   if (UseMontgomerySquareIntrinsic) {
1449     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1450       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1451     }
1452     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1453   }
1454   if (UseSquareToLenIntrinsic) {
1455     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1456       warning("squareToLen intrinsic is not available in 32-bit VM");
1457     }
1458     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1459   }
1460   if (UseMulAddIntrinsic) {
1461     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1462       warning("mulAdd intrinsic is not available in 32-bit VM");
1463     }
1464     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1465   }
1466 #endif // _LP64
1467 #endif // COMPILER2_OR_JVMCI
1468 
1469   // On new cpus instructions which update whole XMM register should be used
1470   // to prevent partial register stall due to dependencies on high half.
1471   //
1472   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1473   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1474   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1475   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1476 
1477 
1478   if (is_zx()) { // ZX cpus specific settings
1479     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1480       UseStoreImmI16 = false; // don't use it on ZX cpus
1481     }
1482     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1483       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1484         // Use it on all ZX cpus
1485         UseAddressNop = true;
1486       }
1487     }
1488     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1489       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1490     }
1491     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1492       if (supports_sse3()) {
1493         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1494       } else {
1495         UseXmmRegToRegMoveAll = false;
1496       }
1497     }
1498     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1499 #ifdef COMPILER2
1500       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1501         // For new ZX cpus do the next optimization:
1502         // don't align the beginning of a loop if there are enough instructions
1503         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1504         // in current fetch line (OptoLoopAlignment) or the padding
1505         // is big (> MaxLoopPad).
1506         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1507         // generated NOP instructions. 11 is the largest size of one
1508         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1509         MaxLoopPad = 11;
1510       }
1511 #endif // COMPILER2
1512       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1513         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1514       }
1515       if (supports_sse4_2()) { // new ZX cpus
1516         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1517           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1518         }
1519       }
1520       if (supports_sse4_2()) {
1521         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1522           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1523         }
1524       } else {
1525         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1526           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1527         }
1528         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1529       }
1530     }
1531 
1532     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1533       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1534     }
1535   }
1536 
1537   if (is_amd_family()) { // AMD cpus specific settings
1538     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1539       // Use it on new AMD cpus starting from Opteron.
1540       UseAddressNop = true;
1541     }
1542     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1543       // Use it on new AMD cpus starting from Opteron.
1544       UseNewLongLShift = true;
1545     }
1546     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1547       if (supports_sse4a()) {
1548         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1549       } else {
1550         UseXmmLoadAndClearUpper = false;
1551       }
1552     }
1553     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1554       if (supports_sse4a()) {
1555         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1556       } else {
1557         UseXmmRegToRegMoveAll = false;
1558       }
1559     }
1560     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1561       if (supports_sse4a()) {
1562         UseXmmI2F = true;
1563       } else {
1564         UseXmmI2F = false;
1565       }
1566     }
1567     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1568       if (supports_sse4a()) {
1569         UseXmmI2D = true;
1570       } else {
1571         UseXmmI2D = false;
1572       }
1573     }
1574     if (supports_sse4_2()) {
1575       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1576         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1577       }
1578     } else {
1579       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1580         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1581       }
1582       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1583     }
1584 
1585     // some defaults for AMD family 15h
1586     if (cpu_family() == 0x15) {
1587       // On family 15h processors default is no sw prefetch
1588       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1589         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1590       }
1591       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1592       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1593         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1594       }
1595       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1596       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1597         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1598       }
1599       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1600         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1601       }
1602     }
1603 
1604 #ifdef COMPILER2
1605     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1606       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1607       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1608     }
1609 #endif // COMPILER2
1610 
1611     // Some defaults for AMD family >= 17h && Hygon family 18h
1612     if (cpu_family() >= 0x17) {
1613       // On family >=17h processors use XMM and UnalignedLoadStores
1614       // for Array Copy
1615       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1616         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1617       }
1618       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1619         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1620       }
1621 #ifdef COMPILER2
1622       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1623         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1624       }
1625 #endif
1626     }
1627   }
1628 
1629   if (is_intel()) { // Intel cpus specific settings
1630     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1631       UseStoreImmI16 = false; // don't use it on Intel cpus
1632     }
1633     if (cpu_family() == 6 || cpu_family() == 15) {
1634       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1635         // Use it on all Intel cpus starting from PentiumPro
1636         UseAddressNop = true;
1637       }
1638     }
1639     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1640       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1641     }
1642     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1643       if (supports_sse3()) {
1644         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1645       } else {
1646         UseXmmRegToRegMoveAll = false;
1647       }
1648     }
1649     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1650 #ifdef COMPILER2
1651       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1652         // For new Intel cpus do the next optimization:
1653         // don't align the beginning of a loop if there are enough instructions
1654         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1655         // in current fetch line (OptoLoopAlignment) or the padding
1656         // is big (> MaxLoopPad).
1657         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1658         // generated NOP instructions. 11 is the largest size of one
1659         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1660         MaxLoopPad = 11;
1661       }
1662 #endif // COMPILER2
1663 
1664       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1665         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1666       }
1667       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1668         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1669           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1670         }
1671       }
1672       if (supports_sse4_2()) {
1673         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1674           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1675         }
1676       } else {
1677         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1678           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1679         }
1680         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1681       }
1682     }
1683     if (is_atom_family() || is_knights_family()) {
1684 #ifdef COMPILER2
1685       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1686         OptoScheduling = true;
1687       }
1688 #endif
1689       if (supports_sse4_2()) { // Silvermont
1690         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1691           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1692         }
1693       }
1694       if (FLAG_IS_DEFAULT(UseIncDec)) {
1695         FLAG_SET_DEFAULT(UseIncDec, false);
1696       }
1697     }
1698     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1699       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1700     }
1701 #ifdef COMPILER2
1702     if (UseAVX > 2) {
1703       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1704           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1705            ArrayOperationPartialInlineSize != 0 &&
1706            ArrayOperationPartialInlineSize != 16 &&
1707            ArrayOperationPartialInlineSize != 32 &&
1708            ArrayOperationPartialInlineSize != 64)) {
1709         int inline_size = 0;
1710         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1711           inline_size = 64;
1712         } else if (MaxVectorSize >= 32) {
1713           inline_size = 32;
1714         } else if (MaxVectorSize >= 16) {
1715           inline_size = 16;
1716         }
1717         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1718           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1719         }
1720         ArrayOperationPartialInlineSize = inline_size;
1721       }
1722 
1723       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1724         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1725         if (ArrayOperationPartialInlineSize) {
1726           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1727         } else {
1728           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1729         }
1730       }
1731     }
1732 #endif
1733   }
1734 
1735 #ifdef COMPILER2
1736   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1737     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1738       OptimizeFill = false;
1739     }
1740   }
1741 #endif
1742 
1743 #ifdef _LP64
1744   if (UseSSE42Intrinsics) {
1745     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1746       UseVectorizedMismatchIntrinsic = true;
1747     }
1748   } else if (UseVectorizedMismatchIntrinsic) {
1749     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1750       warning("vectorizedMismatch intrinsics are not available on this CPU");
1751     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1752   }
1753   if (UseAVX >= 2) {
1754     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1755   } else if (UseVectorizedHashCodeIntrinsic) {
1756     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1757       warning("vectorizedHashCode intrinsics are not available on this CPU");
1758     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1759   }
1760 #else
1761   if (UseVectorizedMismatchIntrinsic) {
1762     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1763       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1764     }
1765     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1766   }
1767   if (UseVectorizedHashCodeIntrinsic) {
1768     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1769       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1770     }
1771     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1772   }
1773 #endif // _LP64
1774 
1775   // Use count leading zeros count instruction if available.
1776   if (supports_lzcnt()) {
1777     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1778       UseCountLeadingZerosInstruction = true;
1779     }
1780    } else if (UseCountLeadingZerosInstruction) {
1781     warning("lzcnt instruction is not available on this CPU");
1782     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1783   }
1784 
1785   // Use count trailing zeros instruction if available
1786   if (supports_bmi1()) {
1787     // tzcnt does not require VEX prefix
1788     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1789       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1790         // Don't use tzcnt if BMI1 is switched off on command line.
1791         UseCountTrailingZerosInstruction = false;
1792       } else {
1793         UseCountTrailingZerosInstruction = true;
1794       }
1795     }
1796   } else if (UseCountTrailingZerosInstruction) {
1797     warning("tzcnt instruction is not available on this CPU");
1798     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1799   }
1800 
1801   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1802   // VEX prefix is generated only when AVX > 0.
1803   if (supports_bmi1() && supports_avx()) {
1804     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1805       UseBMI1Instructions = true;
1806     }
1807   } else if (UseBMI1Instructions) {
1808     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1809     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1810   }
1811 
1812   if (supports_bmi2() && supports_avx()) {
1813     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1814       UseBMI2Instructions = true;
1815     }
1816   } else if (UseBMI2Instructions) {
1817     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1818     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1819   }
1820 
1821   // Use population count instruction if available.
1822   if (supports_popcnt()) {
1823     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1824       UsePopCountInstruction = true;
1825     }
1826   } else if (UsePopCountInstruction) {
1827     warning("POPCNT instruction is not available on this CPU");
1828     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1829   }
1830 
1831   // Use fast-string operations if available.
1832   if (supports_erms()) {
1833     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1834       UseFastStosb = true;
1835     }
1836   } else if (UseFastStosb) {
1837     warning("fast-string operations are not available on this CPU");
1838     FLAG_SET_DEFAULT(UseFastStosb, false);
1839   }
1840 
1841   // For AMD Processors use XMM/YMM MOVDQU instructions
1842   // for Object Initialization as default
1843   if (is_amd() && cpu_family() >= 0x19) {
1844     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1845       UseFastStosb = false;
1846     }
1847   }
1848 
1849 #ifdef COMPILER2
1850   if (is_intel() && MaxVectorSize > 16) {
1851     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1852       UseFastStosb = false;
1853     }
1854   }
1855 #endif
1856 
1857   // Use XMM/YMM MOVDQU instruction for Object Initialization
1858   if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1859     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1860       UseXMMForObjInit = true;
1861     }
1862   } else if (UseXMMForObjInit) {
1863     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1864     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1865   }
1866 
1867 #ifdef COMPILER2
1868   if (FLAG_IS_DEFAULT(AlignVector)) {
1869     // Modern processors allow misaligned memory operations for vectors.
1870     AlignVector = !UseUnalignedLoadStores;
1871   }
1872 #endif // COMPILER2
1873 
1874   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1875     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1876       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1877     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1878       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1879     }
1880   }
1881 
1882   // Allocation prefetch settings
1883   int cache_line_size = checked_cast<int>(prefetch_data_size());
1884   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1885       (cache_line_size > AllocatePrefetchStepSize)) {
1886     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1887   }
1888 
1889   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1890     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1891     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1892       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1893     }
1894     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1895   }
1896 
1897   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1898     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1899     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1900   }
1901 
1902   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1903     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1904         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1905       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1906     }
1907 #ifdef COMPILER2
1908     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1909       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1910     }
1911 #endif
1912   }
1913 
1914   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1915 #ifdef COMPILER2
1916     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1917       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1918     }
1919 #endif
1920   }
1921 
1922 #ifdef _LP64
1923   // Prefetch settings
1924 
1925   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1926   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1927   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1928   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1929 
1930   // gc copy/scan is disabled if prefetchw isn't supported, because
1931   // Prefetch::write emits an inlined prefetchw on Linux.
1932   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1933   // The used prefetcht0 instruction works for both amd64 and em64t.
1934 
1935   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1936     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1937   }
1938   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1939     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1940   }
1941 #endif
1942 
1943   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1944      (cache_line_size > ContendedPaddingWidth))
1945      ContendedPaddingWidth = cache_line_size;
1946 
1947   // This machine allows unaligned memory accesses
1948   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1949     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1950   }
1951 
1952 #ifndef PRODUCT
1953   if (log_is_enabled(Info, os, cpu)) {
1954     LogStream ls(Log(os, cpu)::info());
1955     outputStream* log = &ls;
1956     log->print_cr("Logical CPUs per core: %u",
1957                   logical_processors_per_package());
1958     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1959     log->print("UseSSE=%d", UseSSE);
1960     if (UseAVX > 0) {
1961       log->print("  UseAVX=%d", UseAVX);
1962     }
1963     if (UseAES) {
1964       log->print("  UseAES=1");
1965     }
1966 #ifdef COMPILER2
1967     if (MaxVectorSize > 0) {
1968       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1969     }
1970 #endif
1971     log->cr();
1972     log->print("Allocation");
1973     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1974       log->print_cr(": no prefetching");
1975     } else {
1976       log->print(" prefetching: ");
1977       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1978         log->print("PREFETCHW");
1979       } else if (UseSSE >= 1) {
1980         if (AllocatePrefetchInstr == 0) {
1981           log->print("PREFETCHNTA");
1982         } else if (AllocatePrefetchInstr == 1) {
1983           log->print("PREFETCHT0");
1984         } else if (AllocatePrefetchInstr == 2) {
1985           log->print("PREFETCHT2");
1986         } else if (AllocatePrefetchInstr == 3) {
1987           log->print("PREFETCHW");
1988         }
1989       }
1990       if (AllocatePrefetchLines > 1) {
1991         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1992       } else {
1993         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1994       }
1995     }
1996 
1997     if (PrefetchCopyIntervalInBytes > 0) {
1998       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1999     }
2000     if (PrefetchScanIntervalInBytes > 0) {
2001       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
2002     }
2003     if (ContendedPaddingWidth > 0) {
2004       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
2005     }
2006   }
2007 #endif // !PRODUCT
2008   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
2009       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
2010   }
2011   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
2012       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
2013   }
2014 }
2015 
2016 void VM_Version::print_platform_virtualization_info(outputStream* st) {
2017   VirtualizationType vrt = VM_Version::get_detected_virtualization();
2018   if (vrt == XenHVM) {
2019     st->print_cr("Xen hardware-assisted virtualization detected");
2020   } else if (vrt == KVM) {
2021     st->print_cr("KVM virtualization detected");
2022   } else if (vrt == VMWare) {
2023     st->print_cr("VMWare virtualization detected");
2024     VirtualizationSupport::print_virtualization_info(st);
2025   } else if (vrt == HyperV) {
2026     st->print_cr("Hyper-V virtualization detected");
2027   } else if (vrt == HyperVRole) {
2028     st->print_cr("Hyper-V role detected");
2029   }
2030 }
2031 
2032 bool VM_Version::compute_has_intel_jcc_erratum() {
2033   if (!is_intel_family_core()) {
2034     // Only Intel CPUs are affected.
2035     return false;
2036   }
2037   // The following table of affected CPUs is based on the following document released by Intel:
2038   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2039   switch (_model) {
2040   case 0x8E:
2041     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2042     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2043     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2044     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2045     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2046     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2047     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2048     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2049     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2050     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2051   case 0x4E:
2052     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2053     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2054     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2055     return _stepping == 0x3;
2056   case 0x55:
2057     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2058     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2059     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2060     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2061     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2062     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2063     return _stepping == 0x4 || _stepping == 0x7;
2064   case 0x5E:
2065     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2066     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2067     return _stepping == 0x3;
2068   case 0x9E:
2069     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2070     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2071     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2072     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2073     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2074     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2075     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2076     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2077     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2078     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2079     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2080     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2081     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2082     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2083     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2084   case 0xA5:
2085     // Not in Intel documentation.
2086     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2087     return true;
2088   case 0xA6:
2089     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2090     return _stepping == 0x0;
2091   case 0xAE:
2092     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2093     return _stepping == 0xA;
2094   default:
2095     // If we are running on another intel machine not recognized in the table, we are okay.
2096     return false;
2097   }
2098 }
2099 
2100 // On Xen, the cpuid instruction returns
2101 //  eax / registers[0]: Version of Xen
2102 //  ebx / registers[1]: chars 'XenV'
2103 //  ecx / registers[2]: chars 'MMXe'
2104 //  edx / registers[3]: chars 'nVMM'
2105 //
2106 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2107 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2108 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2109 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2110 //
2111 // more information :
2112 // https://kb.vmware.com/s/article/1009458
2113 //
2114 void VM_Version::check_virtualizations() {
2115   uint32_t registers[4] = {0};
2116   char signature[13] = {0};
2117 
2118   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2119   // from 0x40000000 until 0x40010000.
2120   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2121   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2122     detect_virt_stub(leaf, registers);
2123     memcpy(signature, &registers[1], 12);
2124 
2125     if (strncmp("VMwareVMware", signature, 12) == 0) {
2126       Abstract_VM_Version::_detected_virtualization = VMWare;
2127       // check for extended metrics from guestlib
2128       VirtualizationSupport::initialize();
2129     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2130       Abstract_VM_Version::_detected_virtualization = HyperV;
2131 #ifdef _WINDOWS
2132       // CPUID leaf 0x40000007 is available to the root partition only.
2133       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2134       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2135       detect_virt_stub(0x40000007, registers);
2136       if ((registers[0] != 0x0) ||
2137           (registers[1] != 0x0) ||
2138           (registers[2] != 0x0) ||
2139           (registers[3] != 0x0)) {
2140         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2141       }
2142 #endif
2143     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2144       Abstract_VM_Version::_detected_virtualization = KVM;
2145     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2146       Abstract_VM_Version::_detected_virtualization = XenHVM;
2147     }
2148   }
2149 }
2150 
2151 #ifdef COMPILER2
2152 // Determine if it's running on Cascade Lake using default options.
2153 bool VM_Version::is_default_intel_cascade_lake() {
2154   return FLAG_IS_DEFAULT(UseAVX) &&
2155          FLAG_IS_DEFAULT(MaxVectorSize) &&
2156          UseAVX > 2 &&
2157          is_intel_cascade_lake();
2158 }
2159 #endif
2160 
2161 bool VM_Version::is_intel_cascade_lake() {
2162   return is_intel_skylake() && _stepping >= 5;
2163 }
2164 
2165 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2166 // for implementing the array copy and clear operations.
2167 // The Intel platforms that supports the serialize instruction
2168 // has improved implementation of 64-byte load/stores and so the default
2169 // threshold is set to 0 for these platforms.
2170 int VM_Version::avx3_threshold() {
2171   return (is_intel_family_core() &&
2172           supports_serialize() &&
2173           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2174 }
2175 
2176 #if defined(_LP64)
2177 void VM_Version::clear_apx_test_state() {
2178   clear_apx_test_state_stub();
2179 }
2180 #endif
2181 
2182 static bool _vm_version_initialized = false;
2183 
2184 void VM_Version::initialize() {
2185   ResourceMark rm;
2186   // Making this stub must be FIRST use of assembler
2187   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2188   if (stub_blob == nullptr) {
2189     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2190   }
2191   CodeBuffer c(stub_blob);
2192   VM_Version_StubGenerator g(&c);
2193 
2194   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2195                                      g.generate_get_cpu_info());
2196   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2197                                      g.generate_detect_virt());
2198 
2199 #if defined(_LP64)
2200   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2201                                      g.clear_apx_test_state());
2202 #endif
2203   get_processor_features();
2204 
2205   LP64_ONLY(Assembler::precompute_instructions();)
2206 
2207   if (VM_Version::supports_hv()) { // Supports hypervisor
2208     check_virtualizations();
2209   }
2210   _vm_version_initialized = true;
2211 }
2212 
2213 typedef enum {
2214    CPU_FAMILY_8086_8088  = 0,
2215    CPU_FAMILY_INTEL_286  = 2,
2216    CPU_FAMILY_INTEL_386  = 3,
2217    CPU_FAMILY_INTEL_486  = 4,
2218    CPU_FAMILY_PENTIUM    = 5,
2219    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2220    CPU_FAMILY_PENTIUM_4  = 0xF
2221 } FamilyFlag;
2222 
2223 typedef enum {
2224   RDTSCP_FLAG  = 0x08000000, // bit 27
2225   INTEL64_FLAG = 0x20000000  // bit 29
2226 } _featureExtendedEdxFlag;
2227 
2228 typedef enum {
2229    FPU_FLAG     = 0x00000001,
2230    VME_FLAG     = 0x00000002,
2231    DE_FLAG      = 0x00000004,
2232    PSE_FLAG     = 0x00000008,
2233    TSC_FLAG     = 0x00000010,
2234    MSR_FLAG     = 0x00000020,
2235    PAE_FLAG     = 0x00000040,
2236    MCE_FLAG     = 0x00000080,
2237    CX8_FLAG     = 0x00000100,
2238    APIC_FLAG    = 0x00000200,
2239    SEP_FLAG     = 0x00000800,
2240    MTRR_FLAG    = 0x00001000,
2241    PGE_FLAG     = 0x00002000,
2242    MCA_FLAG     = 0x00004000,
2243    CMOV_FLAG    = 0x00008000,
2244    PAT_FLAG     = 0x00010000,
2245    PSE36_FLAG   = 0x00020000,
2246    PSNUM_FLAG   = 0x00040000,
2247    CLFLUSH_FLAG = 0x00080000,
2248    DTS_FLAG     = 0x00200000,
2249    ACPI_FLAG    = 0x00400000,
2250    MMX_FLAG     = 0x00800000,
2251    FXSR_FLAG    = 0x01000000,
2252    SSE_FLAG     = 0x02000000,
2253    SSE2_FLAG    = 0x04000000,
2254    SS_FLAG      = 0x08000000,
2255    HTT_FLAG     = 0x10000000,
2256    TM_FLAG      = 0x20000000
2257 } FeatureEdxFlag;
2258 
2259 static BufferBlob* cpuid_brand_string_stub_blob;
2260 static const int   cpuid_brand_string_stub_size = 550;
2261 
2262 extern "C" {
2263   typedef void (*getCPUIDBrandString_stub_t)(void*);
2264 }
2265 
2266 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2267 
2268 // VM_Version statics
2269 enum {
2270   ExtendedFamilyIdLength_INTEL = 16,
2271   ExtendedFamilyIdLength_AMD   = 24
2272 };
2273 
2274 const size_t VENDOR_LENGTH = 13;
2275 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2276 static char* _cpu_brand_string = nullptr;
2277 static int64_t _max_qualified_cpu_frequency = 0;
2278 
2279 static int _no_of_threads = 0;
2280 static int _no_of_cores = 0;
2281 
2282 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2283   "8086/8088",
2284   "",
2285   "286",
2286   "386",
2287   "486",
2288   "Pentium",
2289   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2290   "",
2291   "",
2292   "",
2293   "",
2294   "",
2295   "",
2296   "",
2297   "",
2298   "Pentium 4"
2299 };
2300 
2301 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2302   "",
2303   "",
2304   "",
2305   "",
2306   "5x86",
2307   "K5/K6",
2308   "Athlon/AthlonXP",
2309   "",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "",
2315   "",
2316   "",
2317   "Opteron/Athlon64",
2318   "Opteron QC/Phenom",  // Barcelona et.al.
2319   "",
2320   "",
2321   "",
2322   "",
2323   "",
2324   "",
2325   "Zen"
2326 };
2327 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2328 // September 2013, Vol 3C Table 35-1
2329 const char* const _model_id_pentium_pro[] = {
2330   "",
2331   "Pentium Pro",
2332   "",
2333   "Pentium II model 3",
2334   "",
2335   "Pentium II model 5/Xeon/Celeron",
2336   "Celeron",
2337   "Pentium III/Pentium III Xeon",
2338   "Pentium III/Pentium III Xeon",
2339   "Pentium M model 9",    // Yonah
2340   "Pentium III, model A",
2341   "Pentium III, model B",
2342   "",
2343   "Pentium M model D",    // Dothan
2344   "",
2345   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2346   "",
2347   "",
2348   "",
2349   "",
2350   "",
2351   "",
2352   "Celeron",              // 0x16 Celeron 65nm
2353   "Core 2",               // 0x17 Penryn / Harpertown
2354   "",
2355   "",
2356   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2357   "Atom",                 // 0x1B Z5xx series Silverthorn
2358   "",
2359   "Core 2",               // 0x1D Dunnington (6-core)
2360   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2361   "",
2362   "",
2363   "",
2364   "",
2365   "",
2366   "",
2367   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2368   "",
2369   "",
2370   "",                     // 0x28
2371   "",
2372   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2373   "",
2374   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2375   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2376   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2377   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2378   "",
2379   "",
2380   "",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "Ivy Bridge",           // 0x3a
2389   "",
2390   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2391   "",                     // 0x3d "Next Generation Intel Core Processor"
2392   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2393   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2394   "",
2395   "",
2396   "",
2397   "",
2398   "",
2399   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2400   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2401   nullptr
2402 };
2403 
2404 /* Brand ID is for back compatibility
2405  * Newer CPUs uses the extended brand string */
2406 const char* const _brand_id[] = {
2407   "",
2408   "Celeron processor",
2409   "Pentium III processor",
2410   "Intel Pentium III Xeon processor",
2411   "",
2412   "",
2413   "",
2414   "",
2415   "Intel Pentium 4 processor",
2416   nullptr
2417 };
2418 
2419 
2420 const char* const _feature_edx_id[] = {
2421   "On-Chip FPU",
2422   "Virtual Mode Extensions",
2423   "Debugging Extensions",
2424   "Page Size Extensions",
2425   "Time Stamp Counter",
2426   "Model Specific Registers",
2427   "Physical Address Extension",
2428   "Machine Check Exceptions",
2429   "CMPXCHG8B Instruction",
2430   "On-Chip APIC",
2431   "",
2432   "Fast System Call",
2433   "Memory Type Range Registers",
2434   "Page Global Enable",
2435   "Machine Check Architecture",
2436   "Conditional Mov Instruction",
2437   "Page Attribute Table",
2438   "36-bit Page Size Extension",
2439   "Processor Serial Number",
2440   "CLFLUSH Instruction",
2441   "",
2442   "Debug Trace Store feature",
2443   "ACPI registers in MSR space",
2444   "Intel Architecture MMX Technology",
2445   "Fast Float Point Save and Restore",
2446   "Streaming SIMD extensions",
2447   "Streaming SIMD extensions 2",
2448   "Self-Snoop",
2449   "Hyper Threading",
2450   "Thermal Monitor",
2451   "",
2452   "Pending Break Enable"
2453 };
2454 
2455 const char* const _feature_extended_edx_id[] = {
2456   "",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "SYSCALL/SYSRET",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "Execute Disable Bit",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "RDTSCP",
2484   "",
2485   "Intel 64 Architecture",
2486   "",
2487   ""
2488 };
2489 
2490 const char* const _feature_ecx_id[] = {
2491   "Streaming SIMD Extensions 3",
2492   "PCLMULQDQ",
2493   "64-bit DS Area",
2494   "MONITOR/MWAIT instructions",
2495   "CPL Qualified Debug Store",
2496   "Virtual Machine Extensions",
2497   "Safer Mode Extensions",
2498   "Enhanced Intel SpeedStep technology",
2499   "Thermal Monitor 2",
2500   "Supplemental Streaming SIMD Extensions 3",
2501   "L1 Context ID",
2502   "",
2503   "Fused Multiply-Add",
2504   "CMPXCHG16B",
2505   "xTPR Update Control",
2506   "Perfmon and Debug Capability",
2507   "",
2508   "Process-context identifiers",
2509   "Direct Cache Access",
2510   "Streaming SIMD extensions 4.1",
2511   "Streaming SIMD extensions 4.2",
2512   "x2APIC",
2513   "MOVBE",
2514   "Popcount instruction",
2515   "TSC-Deadline",
2516   "AESNI",
2517   "XSAVE",
2518   "OSXSAVE",
2519   "AVX",
2520   "F16C",
2521   "RDRAND",
2522   ""
2523 };
2524 
2525 const char* const _feature_extended_ecx_id[] = {
2526   "LAHF/SAHF instruction support",
2527   "Core multi-processor legacy mode",
2528   "",
2529   "",
2530   "",
2531   "Advanced Bit Manipulations: LZCNT",
2532   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2533   "Misaligned SSE mode",
2534   "",
2535   "",
2536   "",
2537   "",
2538   "",
2539   "",
2540   "",
2541   "",
2542   "",
2543   "",
2544   "",
2545   "",
2546   "",
2547   "",
2548   "",
2549   "",
2550   "",
2551   "",
2552   "",
2553   "",
2554   "",
2555   "",
2556   "",
2557   ""
2558 };
2559 
2560 void VM_Version::initialize_tsc(void) {
2561   ResourceMark rm;
2562 
2563   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2564   if (cpuid_brand_string_stub_blob == nullptr) {
2565     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2566   }
2567   CodeBuffer c(cpuid_brand_string_stub_blob);
2568   VM_Version_StubGenerator g(&c);
2569   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2570                                    g.generate_getCPUIDBrandString());
2571 }
2572 
2573 const char* VM_Version::cpu_model_description(void) {
2574   uint32_t cpu_family = extended_cpu_family();
2575   uint32_t cpu_model = extended_cpu_model();
2576   const char* model = nullptr;
2577 
2578   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2579     for (uint32_t i = 0; i <= cpu_model; i++) {
2580       model = _model_id_pentium_pro[i];
2581       if (model == nullptr) {
2582         break;
2583       }
2584     }
2585   }
2586   return model;
2587 }
2588 
2589 const char* VM_Version::cpu_brand_string(void) {
2590   if (_cpu_brand_string == nullptr) {
2591     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2592     if (nullptr == _cpu_brand_string) {
2593       return nullptr;
2594     }
2595     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2596     if (ret_val != OS_OK) {
2597       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2598       _cpu_brand_string = nullptr;
2599     }
2600   }
2601   return _cpu_brand_string;
2602 }
2603 
2604 const char* VM_Version::cpu_brand(void) {
2605   const char*  brand  = nullptr;
2606 
2607   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2608     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2609     brand = _brand_id[0];
2610     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2611       brand = _brand_id[i];
2612     }
2613   }
2614   return brand;
2615 }
2616 
2617 bool VM_Version::cpu_is_em64t(void) {
2618   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2619 }
2620 
2621 bool VM_Version::is_netburst(void) {
2622   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2623 }
2624 
2625 bool VM_Version::supports_tscinv_ext(void) {
2626   if (!supports_tscinv_bit()) {
2627     return false;
2628   }
2629 
2630   if (is_intel()) {
2631     return true;
2632   }
2633 
2634   if (is_amd()) {
2635     return !is_amd_Barcelona();
2636   }
2637 
2638   if (is_hygon()) {
2639     return true;
2640   }
2641 
2642   return false;
2643 }
2644 
2645 void VM_Version::resolve_cpu_information_details(void) {
2646 
2647   // in future we want to base this information on proper cpu
2648   // and cache topology enumeration such as:
2649   // Intel 64 Architecture Processor Topology Enumeration
2650   // which supports system cpu and cache topology enumeration
2651   // either using 2xAPICIDs or initial APICIDs
2652 
2653   // currently only rough cpu information estimates
2654   // which will not necessarily reflect the exact configuration of the system
2655 
2656   // this is the number of logical hardware threads
2657   // visible to the operating system
2658   _no_of_threads = os::processor_count();
2659 
2660   // find out number of threads per cpu package
2661   int threads_per_package = threads_per_core() * cores_per_cpu();
2662 
2663   // use amount of threads visible to the process in order to guess number of sockets
2664   _no_of_sockets = _no_of_threads / threads_per_package;
2665 
2666   // process might only see a subset of the total number of threads
2667   // from a single processor package. Virtualization/resource management for example.
2668   // If so then just write a hard 1 as num of pkgs.
2669   if (0 == _no_of_sockets) {
2670     _no_of_sockets = 1;
2671   }
2672 
2673   // estimate the number of cores
2674   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2675 }
2676 
2677 
2678 const char* VM_Version::cpu_family_description(void) {
2679   int cpu_family_id = extended_cpu_family();
2680   if (is_amd()) {
2681     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2682       return _family_id_amd[cpu_family_id];
2683     }
2684   }
2685   if (is_intel()) {
2686     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2687       return cpu_model_description();
2688     }
2689     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2690       return _family_id_intel[cpu_family_id];
2691     }
2692   }
2693   if (is_hygon()) {
2694     return "Dhyana";
2695   }
2696   return "Unknown x86";
2697 }
2698 
2699 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2700   assert(buf != nullptr, "buffer is null!");
2701   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2702 
2703   const char* cpu_type = nullptr;
2704   const char* x64 = nullptr;
2705 
2706   if (is_intel()) {
2707     cpu_type = "Intel";
2708     x64 = cpu_is_em64t() ? " Intel64" : "";
2709   } else if (is_amd()) {
2710     cpu_type = "AMD";
2711     x64 = cpu_is_em64t() ? " AMD64" : "";
2712   } else if (is_hygon()) {
2713     cpu_type = "Hygon";
2714     x64 = cpu_is_em64t() ? " AMD64" : "";
2715   } else {
2716     cpu_type = "Unknown x86";
2717     x64 = cpu_is_em64t() ? " x86_64" : "";
2718   }
2719 
2720   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2721     cpu_type,
2722     cpu_family_description(),
2723     supports_ht() ? " (HT)" : "",
2724     supports_sse3() ? " SSE3" : "",
2725     supports_ssse3() ? " SSSE3" : "",
2726     supports_sse4_1() ? " SSE4.1" : "",
2727     supports_sse4_2() ? " SSE4.2" : "",
2728     supports_sse4a() ? " SSE4A" : "",
2729     is_netburst() ? " Netburst" : "",
2730     is_intel_family_core() ? " Core" : "",
2731     x64);
2732 
2733   return OS_OK;
2734 }
2735 
2736 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2737   assert(buf != nullptr, "buffer is null!");
2738   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2739   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2740 
2741   // invoke newly generated asm code to fetch CPU Brand String
2742   getCPUIDBrandString_stub(&_cpuid_info);
2743 
2744   // fetch results into buffer
2745   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2746   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2747   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2748   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2749   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2750   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2751   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2752   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2753   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2754   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2755   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2756   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2757 
2758   return OS_OK;
2759 }
2760 
2761 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2762   guarantee(buf != nullptr, "buffer is null!");
2763   guarantee(buf_len > 0, "buffer len not enough!");
2764 
2765   unsigned int flag = 0;
2766   unsigned int fi = 0;
2767   size_t       written = 0;
2768   const char*  prefix = "";
2769 
2770 #define WRITE_TO_BUF(string)                                                          \
2771   {                                                                                   \
2772     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2773     if (res < 0) {                                                                    \
2774       return buf_len - 1;                                                             \
2775     }                                                                                 \
2776     written += res;                                                                   \
2777     if (prefix[0] == '\0') {                                                          \
2778       prefix = ", ";                                                                  \
2779     }                                                                                 \
2780   }
2781 
2782   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2783     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2784       continue; /* no hyperthreading */
2785     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2786       continue; /* no fast system call */
2787     }
2788     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2789       WRITE_TO_BUF(_feature_edx_id[fi]);
2790     }
2791   }
2792 
2793   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2794     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2795       WRITE_TO_BUF(_feature_ecx_id[fi]);
2796     }
2797   }
2798 
2799   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2800     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2801       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2802     }
2803   }
2804 
2805   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2806     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2807       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2808     }
2809   }
2810 
2811   if (supports_tscinv_bit()) {
2812       WRITE_TO_BUF("Invariant TSC");
2813   }
2814 
2815   return written;
2816 }
2817 
2818 /**
2819  * Write a detailed description of the cpu to a given buffer, including
2820  * feature set.
2821  */
2822 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2823   assert(buf != nullptr, "buffer is null!");
2824   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2825 
2826   static const char* unknown = "<unknown>";
2827   char               vendor_id[VENDOR_LENGTH];
2828   const char*        family = nullptr;
2829   const char*        model = nullptr;
2830   const char*        brand = nullptr;
2831   int                outputLen = 0;
2832 
2833   family = cpu_family_description();
2834   if (family == nullptr) {
2835     family = unknown;
2836   }
2837 
2838   model = cpu_model_description();
2839   if (model == nullptr) {
2840     model = unknown;
2841   }
2842 
2843   brand = cpu_brand_string();
2844 
2845   if (brand == nullptr) {
2846     brand = cpu_brand();
2847     if (brand == nullptr) {
2848       brand = unknown;
2849     }
2850   }
2851 
2852   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2853   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2854   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2855   vendor_id[VENDOR_LENGTH-1] = '\0';
2856 
2857   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2858     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2859     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2860     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2861     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2862     "Supports: ",
2863     brand,
2864     vendor_id,
2865     family,
2866     extended_cpu_family(),
2867     model,
2868     extended_cpu_model(),
2869     cpu_stepping(),
2870     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2871     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2872     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2873     _cpuid_info.std_cpuid1_eax.value,
2874     _cpuid_info.std_cpuid1_ebx.value,
2875     _cpuid_info.std_cpuid1_ecx.value,
2876     _cpuid_info.std_cpuid1_edx.value,
2877     _cpuid_info.ext_cpuid1_eax,
2878     _cpuid_info.ext_cpuid1_ebx,
2879     _cpuid_info.ext_cpuid1_ecx,
2880     _cpuid_info.ext_cpuid1_edx);
2881 
2882   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2883     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2884     return OS_ERR;
2885   }
2886 
2887   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2888 
2889   return OS_OK;
2890 }
2891 
2892 
2893 // Fill in Abstract_VM_Version statics
2894 void VM_Version::initialize_cpu_information() {
2895   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2896   assert(!_initialized, "shouldn't be initialized yet");
2897   resolve_cpu_information_details();
2898 
2899   // initialize cpu_name and cpu_desc
2900   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2901   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2902   _initialized = true;
2903 }
2904 
2905 /**
2906  *  For information about extracting the frequency from the cpu brand string, please see:
2907  *
2908  *    Intel Processor Identification and the CPUID Instruction
2909  *    Application Note 485
2910  *    May 2012
2911  *
2912  * The return value is the frequency in Hz.
2913  */
2914 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2915   const char* const brand_string = cpu_brand_string();
2916   if (brand_string == nullptr) {
2917     return 0;
2918   }
2919   const int64_t MEGA = 1000000;
2920   int64_t multiplier = 0;
2921   int64_t frequency = 0;
2922   uint8_t idx = 0;
2923   // The brand string buffer is at most 48 bytes.
2924   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2925   for (; idx < 48-2; ++idx) {
2926     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2927     // Search brand string for "yHz" where y is M, G, or T.
2928     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2929       if (brand_string[idx] == 'M') {
2930         multiplier = MEGA;
2931       } else if (brand_string[idx] == 'G') {
2932         multiplier = MEGA * 1000;
2933       } else if (brand_string[idx] == 'T') {
2934         multiplier = MEGA * MEGA;
2935       }
2936       break;
2937     }
2938   }
2939   if (multiplier > 0) {
2940     // Compute frequency (in Hz) from brand string.
2941     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2942       frequency =  (brand_string[idx-4] - '0') * multiplier;
2943       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2944       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2945     } else { // format is "xxxx"
2946       frequency =  (brand_string[idx-4] - '0') * 1000;
2947       frequency += (brand_string[idx-3] - '0') * 100;
2948       frequency += (brand_string[idx-2] - '0') * 10;
2949       frequency += (brand_string[idx-1] - '0');
2950       frequency *= multiplier;
2951     }
2952   }
2953   return frequency;
2954 }
2955 
2956 
2957 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2958   if (_max_qualified_cpu_frequency == 0) {
2959     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2960   }
2961   return _max_qualified_cpu_frequency;
2962 }
2963 
2964 uint64_t VM_Version::CpuidInfo::feature_flags() const {
2965   uint64_t result = 0;
2966   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2967     result |= CPU_CX8;
2968   if (std_cpuid1_edx.bits.cmov != 0)
2969     result |= CPU_CMOV;
2970   if (std_cpuid1_edx.bits.clflush != 0)
2971     result |= CPU_FLUSH;
2972 #ifdef _LP64
2973   // clflush should always be available on x86_64
2974   // if not we are in real trouble because we rely on it
2975   // to flush the code cache.
2976   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2977 #endif
2978   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2979       ext_cpuid1_edx.bits.fxsr != 0))
2980     result |= CPU_FXSR;
2981   // HT flag is set for multi-core processors also.
2982   if (threads_per_core() > 1)
2983     result |= CPU_HT;
2984   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2985       ext_cpuid1_edx.bits.mmx != 0))
2986     result |= CPU_MMX;
2987   if (std_cpuid1_edx.bits.sse != 0)
2988     result |= CPU_SSE;
2989   if (std_cpuid1_edx.bits.sse2 != 0)
2990     result |= CPU_SSE2;
2991   if (std_cpuid1_ecx.bits.sse3 != 0)
2992     result |= CPU_SSE3;
2993   if (std_cpuid1_ecx.bits.ssse3 != 0)
2994     result |= CPU_SSSE3;
2995   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2996     result |= CPU_SSE4_1;
2997   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2998     result |= CPU_SSE4_2;
2999   if (std_cpuid1_ecx.bits.popcnt != 0)
3000     result |= CPU_POPCNT;
3001   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
3002       xem_xcr0_eax.bits.apx_f != 0) {
3003     result |= CPU_APX_F;
3004   }
3005   if (std_cpuid1_ecx.bits.avx != 0 &&
3006       std_cpuid1_ecx.bits.osxsave != 0 &&
3007       xem_xcr0_eax.bits.sse != 0 &&
3008       xem_xcr0_eax.bits.ymm != 0) {
3009     result |= CPU_AVX;
3010     result |= CPU_VZEROUPPER;
3011     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
3012       result |= CPU_SHA512;
3013     if (std_cpuid1_ecx.bits.f16c != 0)
3014       result |= CPU_F16C;
3015     if (sef_cpuid7_ebx.bits.avx2 != 0) {
3016       result |= CPU_AVX2;
3017       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
3018         result |= CPU_AVX_IFMA;
3019     }
3020     if (sef_cpuid7_ecx.bits.gfni != 0)
3021         result |= CPU_GFNI;
3022     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
3023         xem_xcr0_eax.bits.opmask != 0 &&
3024         xem_xcr0_eax.bits.zmm512 != 0 &&
3025         xem_xcr0_eax.bits.zmm32 != 0) {
3026       result |= CPU_AVX512F;
3027       if (sef_cpuid7_ebx.bits.avx512cd != 0)
3028         result |= CPU_AVX512CD;
3029       if (sef_cpuid7_ebx.bits.avx512dq != 0)
3030         result |= CPU_AVX512DQ;
3031       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
3032         result |= CPU_AVX512_IFMA;
3033       if (sef_cpuid7_ebx.bits.avx512pf != 0)
3034         result |= CPU_AVX512PF;
3035       if (sef_cpuid7_ebx.bits.avx512er != 0)
3036         result |= CPU_AVX512ER;
3037       if (sef_cpuid7_ebx.bits.avx512bw != 0)
3038         result |= CPU_AVX512BW;
3039       if (sef_cpuid7_ebx.bits.avx512vl != 0)
3040         result |= CPU_AVX512VL;
3041       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3042         result |= CPU_AVX512_VPOPCNTDQ;
3043       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3044         result |= CPU_AVX512_VPCLMULQDQ;
3045       if (sef_cpuid7_ecx.bits.vaes != 0)
3046         result |= CPU_AVX512_VAES;
3047       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3048         result |= CPU_AVX512_VNNI;
3049       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3050         result |= CPU_AVX512_BITALG;
3051       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3052         result |= CPU_AVX512_VBMI;
3053       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3054         result |= CPU_AVX512_VBMI2;
3055     }
3056   }
3057   if (std_cpuid1_ecx.bits.hv != 0)
3058     result |= CPU_HV;
3059   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3060     result |= CPU_BMI1;
3061   if (std_cpuid1_edx.bits.tsc != 0)
3062     result |= CPU_TSC;
3063   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3064     result |= CPU_TSCINV_BIT;
3065   if (std_cpuid1_ecx.bits.aes != 0)
3066     result |= CPU_AES;
3067   if (sef_cpuid7_ebx.bits.erms != 0)
3068     result |= CPU_ERMS;
3069   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3070     result |= CPU_FSRM;
3071   if (std_cpuid1_ecx.bits.clmul != 0)
3072     result |= CPU_CLMUL;
3073   if (sef_cpuid7_ebx.bits.rtm != 0)
3074     result |= CPU_RTM;
3075   if (sef_cpuid7_ebx.bits.adx != 0)
3076      result |= CPU_ADX;
3077   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3078     result |= CPU_BMI2;
3079   if (sef_cpuid7_ebx.bits.sha != 0)
3080     result |= CPU_SHA;
3081   if (std_cpuid1_ecx.bits.fma != 0)
3082     result |= CPU_FMA;
3083   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3084     result |= CPU_FLUSHOPT;
3085   if (ext_cpuid1_edx.bits.rdtscp != 0)
3086     result |= CPU_RDTSCP;
3087   if (sef_cpuid7_ecx.bits.rdpid != 0)
3088     result |= CPU_RDPID;
3089 
3090   // AMD|Hygon features.
3091   if (is_amd_family()) {
3092     if ((ext_cpuid1_edx.bits.tdnow != 0) ||
3093         (ext_cpuid1_ecx.bits.prefetchw != 0))
3094       result |= CPU_3DNOW_PREFETCH;
3095     if (ext_cpuid1_ecx.bits.lzcnt != 0)
3096       result |= CPU_LZCNT;
3097     if (ext_cpuid1_ecx.bits.sse4a != 0)
3098       result |= CPU_SSE4A;
3099   }
3100 
3101   // Intel features.
3102   if (is_intel()) {
3103     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3104       result |= CPU_LZCNT;
3105     }
3106     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3107       result |= CPU_3DNOW_PREFETCH;
3108     }
3109     if (sef_cpuid7_ebx.bits.clwb != 0) {
3110       result |= CPU_CLWB;
3111     }
3112     if (sef_cpuid7_edx.bits.serialize != 0)
3113       result |= CPU_SERIALIZE;
3114   }
3115 
3116   // ZX features.
3117   if (is_zx()) {
3118     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3119       result |= CPU_LZCNT;
3120     }
3121     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3122       result |= CPU_3DNOW_PREFETCH;
3123     }
3124   }
3125 
3126   // Protection key features.
3127   if (sef_cpuid7_ecx.bits.pku != 0) {
3128     result |= CPU_PKU;
3129   }
3130   if (sef_cpuid7_ecx.bits.ospke != 0) {
3131     result |= CPU_OSPKE;
3132   }
3133 
3134   // Control flow enforcement (CET) features.
3135   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3136     result |= CPU_CET_SS;
3137   }
3138   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3139     result |= CPU_CET_IBT;
3140   }
3141 
3142   // Composite features.
3143   if (supports_tscinv_bit() &&
3144       ((is_amd_family() && !is_amd_Barcelona()) ||
3145        is_intel_tsc_synched_at_init())) {
3146     result |= CPU_TSCINV;
3147   }
3148 
3149   return result;
3150 }
3151 
3152 bool VM_Version::os_supports_avx_vectors() {
3153   bool retVal = false;
3154   int nreg = 2 LP64_ONLY(+2);
3155   if (supports_evex()) {
3156     // Verify that OS save/restore all bits of EVEX registers
3157     // during signal processing.
3158     retVal = true;
3159     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3160       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3161         retVal = false;
3162         break;
3163       }
3164     }
3165   } else if (supports_avx()) {
3166     // Verify that OS save/restore all bits of AVX registers
3167     // during signal processing.
3168     retVal = true;
3169     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3170       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3171         retVal = false;
3172         break;
3173       }
3174     }
3175     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3176     if (retVal == false) {
3177       // Verify that OS save/restore all bits of EVEX registers
3178       // during signal processing.
3179       retVal = true;
3180       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3181         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3182           retVal = false;
3183           break;
3184         }
3185       }
3186     }
3187   }
3188   return retVal;
3189 }
3190 
3191 bool VM_Version::os_supports_apx_egprs() {
3192   if (!supports_apx_f()) {
3193     return false;
3194   }
3195   // Enable APX support for product builds after
3196   // completion of planned features listed in JDK-8329030.
3197 #if !defined(PRODUCT)
3198   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3199       _cpuid_info.apx_save[1] != egpr_test_value()) {
3200     return false;
3201   }
3202   return true;
3203 #else
3204   return false;
3205 #endif
3206 }
3207 
3208 uint VM_Version::cores_per_cpu() {
3209   uint result = 1;
3210   if (is_intel()) {
3211     bool supports_topology = supports_processor_topology();
3212     if (supports_topology) {
3213       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3214                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3215     }
3216     if (!supports_topology || result == 0) {
3217       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3218     }
3219   } else if (is_amd_family()) {
3220     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3221   } else if (is_zx()) {
3222     bool supports_topology = supports_processor_topology();
3223     if (supports_topology) {
3224       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3225                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3226     }
3227     if (!supports_topology || result == 0) {
3228       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3229     }
3230   }
3231   return result;
3232 }
3233 
3234 uint VM_Version::threads_per_core() {
3235   uint result = 1;
3236   if (is_intel() && supports_processor_topology()) {
3237     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3238   } else if (is_zx() && supports_processor_topology()) {
3239     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3240   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3241     if (cpu_family() >= 0x17) {
3242       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3243     } else {
3244       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3245                  cores_per_cpu();
3246     }
3247   }
3248   return (result == 0 ? 1 : result);
3249 }
3250 
3251 uint VM_Version::L1_line_size() {
3252   uint result = 0;
3253   if (is_intel()) {
3254     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3255   } else if (is_amd_family()) {
3256     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3257   } else if (is_zx()) {
3258     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3259   }
3260   if (result < 32) // not defined ?
3261     result = 32;   // 32 bytes by default on x86 and other x64
3262   return result;
3263 }
3264 
3265 bool VM_Version::is_intel_tsc_synched_at_init() {
3266   if (is_intel_family_core()) {
3267     uint32_t ext_model = extended_cpu_model();
3268     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3269         ext_model == CPU_MODEL_WESTMERE_EP    ||
3270         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3271         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3272       // <= 2-socket invariant tsc support. EX versions are usually used
3273       // in > 2-socket systems and likely don't synchronize tscs at
3274       // initialization.
3275       // Code that uses tsc values must be prepared for them to arbitrarily
3276       // jump forward or backward.
3277       return true;
3278     }
3279   }
3280   return false;
3281 }
3282 
3283 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3284   // Hardware prefetching (distance/size in bytes):
3285   // Pentium 3 -  64 /  32
3286   // Pentium 4 - 256 / 128
3287   // Athlon    -  64 /  32 ????
3288   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3289   // Core      - 128 /  64
3290   //
3291   // Software prefetching (distance in bytes / instruction with best score):
3292   // Pentium 3 - 128 / prefetchnta
3293   // Pentium 4 - 512 / prefetchnta
3294   // Athlon    - 128 / prefetchnta
3295   // Opteron   - 256 / prefetchnta
3296   // Core      - 256 / prefetchnta
3297   // It will be used only when AllocatePrefetchStyle > 0
3298 
3299   if (is_amd_family()) { // AMD | Hygon
3300     if (supports_sse2()) {
3301       return 256; // Opteron
3302     } else {
3303       return 128; // Athlon
3304     }
3305   } else { // Intel
3306     if (supports_sse3() && cpu_family() == 6) {
3307       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3308         return 192;
3309       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3310 #ifdef _LP64
3311         return 384;
3312 #else
3313         return 320;
3314 #endif
3315       }
3316     }
3317     if (supports_sse2()) {
3318       if (cpu_family() == 6) {
3319         return 256; // Pentium M, Core, Core2
3320       } else {
3321         return 512; // Pentium 4
3322       }
3323     } else {
3324       return 128; // Pentium 3 (and all other old CPUs)
3325     }
3326   }
3327 }
3328 
3329 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3330   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3331   switch (id) {
3332   case vmIntrinsics::_floatToFloat16:
3333   case vmIntrinsics::_float16ToFloat:
3334     if (!supports_float16()) {
3335       return false;
3336     }
3337     break;
3338   default:
3339     break;
3340   }
3341   return true;
3342 }