1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/powerOfTwo.hpp"
  42 #include "utilities/virtualizationSupport.hpp"
  43 
  44 int VM_Version::_cpu;
  45 int VM_Version::_model;
  46 int VM_Version::_stepping;
  47 bool VM_Version::_has_intel_jcc_erratum;
  48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  49 
  50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  52 #undef DECLARE_CPU_FEATURE_FLAG
  53 
  54 // Address of instruction which causes SEGV
  55 address VM_Version::_cpuinfo_segv_addr = nullptr;
  56 // Address of instruction after the one which causes SEGV
  57 address VM_Version::_cpuinfo_cont_addr = nullptr;
  58 // Address of instruction which causes APX specific SEGV
  59 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  60 // Address of instruction after the one which causes APX specific SEGV
  61 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  62 
  63 static BufferBlob* stub_blob;
  64 static const int stub_size = 2000;
  65 
  66 extern "C" {
  67   typedef void (*get_cpu_info_stub_t)(void*);
  68   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  69   typedef void (*clear_apx_test_state_t)(void);
  70 }
  71 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  72 static detect_virt_stub_t detect_virt_stub = nullptr;
  73 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  74 
  75 bool VM_Version::supports_clflush() {
  76   // clflush should always be available on x86_64
  77   // if not we are in real trouble because we rely on it
  78   // to flush the code cache.
  79   // Unfortunately, Assembler::clflush is currently called as part
  80   // of generation of the code cache flush routine. This happens
  81   // under Universe::init before the processor features are set
  82   // up. Assembler::flush calls this routine to check that clflush
  83   // is allowed. So, we give the caller a free pass if Universe init
  84   // is still in progress.
  85   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  86   return true;
  87 }
  88 
  89 #define CPUID_STANDARD_FN   0x0
  90 #define CPUID_STANDARD_FN_1 0x1
  91 #define CPUID_STANDARD_FN_4 0x4
  92 #define CPUID_STANDARD_FN_B 0xb
  93 
  94 #define CPUID_EXTENDED_FN   0x80000000
  95 #define CPUID_EXTENDED_FN_1 0x80000001
  96 #define CPUID_EXTENDED_FN_2 0x80000002
  97 #define CPUID_EXTENDED_FN_3 0x80000003
  98 #define CPUID_EXTENDED_FN_4 0x80000004
  99 #define CPUID_EXTENDED_FN_7 0x80000007
 100 #define CPUID_EXTENDED_FN_8 0x80000008
 101 
 102 class VM_Version_StubGenerator: public StubCodeGenerator {
 103  public:
 104 
 105   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 106 
 107   address clear_apx_test_state() {
 108 #   define __ _masm->
 109     address start = __ pc();
 110     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 111     // handling guarantees that preserved register values post signal handling were
 112     // re-instantiated by operating system and not because they were not modified externally.
 113 
 114     bool save_apx = UseAPX;
 115     VM_Version::set_apx_cpuFeatures();
 116     UseAPX = true;
 117     // EGPR state save/restoration.
 118     __ mov64(r16, 0L);
 119     __ mov64(r31, 0L);
 120     UseAPX = save_apx;
 121     VM_Version::clean_cpuFeatures();
 122     __ ret(0);
 123     return start;
 124   }
 125 
 126   address generate_get_cpu_info() {
 127     // Flags to test CPU type.
 128     const uint32_t HS_EFL_AC = 0x40000;
 129     const uint32_t HS_EFL_ID = 0x200000;
 130     // Values for when we don't have a CPUID instruction.
 131     const int      CPU_FAMILY_SHIFT = 8;
 132     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 133     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 134     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 135 
 136     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 137     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 138     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 139     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 140 
 141     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 142 #   define __ _masm->
 143 
 144     address start = __ pc();
 145 
 146     //
 147     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 148     //
 149     // rcx and rdx are first and second argument registers on windows
 150 
 151     __ push(rbp);
 152     __ mov(rbp, c_rarg0); // cpuid_info address
 153     __ push(rbx);
 154     __ push(rsi);
 155     __ pushf();          // preserve rbx, and flags
 156     __ pop(rax);
 157     __ push(rax);
 158     __ mov(rcx, rax);
 159     //
 160     // if we are unable to change the AC flag, we have a 386
 161     //
 162     __ xorl(rax, HS_EFL_AC);
 163     __ push(rax);
 164     __ popf();
 165     __ pushf();
 166     __ pop(rax);
 167     __ cmpptr(rax, rcx);
 168     __ jccb(Assembler::notEqual, detect_486);
 169 
 170     __ movl(rax, CPU_FAMILY_386);
 171     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 172     __ jmp(done);
 173 
 174     //
 175     // If we are unable to change the ID flag, we have a 486 which does
 176     // not support the "cpuid" instruction.
 177     //
 178     __ bind(detect_486);
 179     __ mov(rax, rcx);
 180     __ xorl(rax, HS_EFL_ID);
 181     __ push(rax);
 182     __ popf();
 183     __ pushf();
 184     __ pop(rax);
 185     __ cmpptr(rcx, rax);
 186     __ jccb(Assembler::notEqual, detect_586);
 187 
 188     __ bind(cpu486);
 189     __ movl(rax, CPU_FAMILY_486);
 190     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 191     __ jmp(done);
 192 
 193     //
 194     // At this point, we have a chip which supports the "cpuid" instruction
 195     //
 196     __ bind(detect_586);
 197     __ xorl(rax, rax);
 198     __ cpuid();
 199     __ orl(rax, rax);
 200     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 201                                         // value of at least 1, we give up and
 202                                         // assume a 486
 203     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 204     __ movl(Address(rsi, 0), rax);
 205     __ movl(Address(rsi, 4), rbx);
 206     __ movl(Address(rsi, 8), rcx);
 207     __ movl(Address(rsi,12), rdx);
 208 
 209     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 210     __ jccb(Assembler::belowEqual, std_cpuid4);
 211 
 212     //
 213     // cpuid(0xB) Processor Topology
 214     //
 215     __ movl(rax, 0xb);
 216     __ xorl(rcx, rcx);   // Threads level
 217     __ cpuid();
 218 
 219     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 220     __ movl(Address(rsi, 0), rax);
 221     __ movl(Address(rsi, 4), rbx);
 222     __ movl(Address(rsi, 8), rcx);
 223     __ movl(Address(rsi,12), rdx);
 224 
 225     __ movl(rax, 0xb);
 226     __ movl(rcx, 1);     // Cores level
 227     __ cpuid();
 228     __ push(rax);
 229     __ andl(rax, 0x1f);  // Determine if valid topology level
 230     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 231     __ andl(rax, 0xffff);
 232     __ pop(rax);
 233     __ jccb(Assembler::equal, std_cpuid4);
 234 
 235     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 236     __ movl(Address(rsi, 0), rax);
 237     __ movl(Address(rsi, 4), rbx);
 238     __ movl(Address(rsi, 8), rcx);
 239     __ movl(Address(rsi,12), rdx);
 240 
 241     __ movl(rax, 0xb);
 242     __ movl(rcx, 2);     // Packages level
 243     __ cpuid();
 244     __ push(rax);
 245     __ andl(rax, 0x1f);  // Determine if valid topology level
 246     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 247     __ andl(rax, 0xffff);
 248     __ pop(rax);
 249     __ jccb(Assembler::equal, std_cpuid4);
 250 
 251     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 252     __ movl(Address(rsi, 0), rax);
 253     __ movl(Address(rsi, 4), rbx);
 254     __ movl(Address(rsi, 8), rcx);
 255     __ movl(Address(rsi,12), rdx);
 256 
 257     //
 258     // cpuid(0x4) Deterministic cache params
 259     //
 260     __ bind(std_cpuid4);
 261     __ movl(rax, 4);
 262     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 263     __ jccb(Assembler::greater, std_cpuid1);
 264 
 265     __ xorl(rcx, rcx);   // L1 cache
 266     __ cpuid();
 267     __ push(rax);
 268     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 269     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 270     __ pop(rax);
 271     __ jccb(Assembler::equal, std_cpuid1);
 272 
 273     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 274     __ movl(Address(rsi, 0), rax);
 275     __ movl(Address(rsi, 4), rbx);
 276     __ movl(Address(rsi, 8), rcx);
 277     __ movl(Address(rsi,12), rdx);
 278 
 279     //
 280     // Standard cpuid(0x1)
 281     //
 282     __ bind(std_cpuid1);
 283     __ movl(rax, 1);
 284     __ cpuid();
 285     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 286     __ movl(Address(rsi, 0), rax);
 287     __ movl(Address(rsi, 4), rbx);
 288     __ movl(Address(rsi, 8), rcx);
 289     __ movl(Address(rsi,12), rdx);
 290 
 291     //
 292     // Check if OS has enabled XGETBV instruction to access XCR0
 293     // (OSXSAVE feature flag) and CPU supports AVX
 294     //
 295     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 296     __ cmpl(rcx, 0x18000000);
 297     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 298 
 299     //
 300     // XCR0, XFEATURE_ENABLED_MASK register
 301     //
 302     __ xorl(rcx, rcx);   // zero for XCR0 register
 303     __ xgetbv();
 304     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 305     __ movl(Address(rsi, 0), rax);
 306     __ movl(Address(rsi, 4), rdx);
 307 
 308     //
 309     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 310     //
 311     __ bind(sef_cpuid);
 312     __ movl(rax, 7);
 313     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 314     __ jccb(Assembler::greater, ext_cpuid);
 315     // ECX = 0
 316     __ xorl(rcx, rcx);
 317     __ cpuid();
 318     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 319     __ movl(Address(rsi, 0), rax);
 320     __ movl(Address(rsi, 4), rbx);
 321     __ movl(Address(rsi, 8), rcx);
 322     __ movl(Address(rsi, 12), rdx);
 323 
 324     //
 325     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 326     //
 327     __ bind(sefsl1_cpuid);
 328     __ movl(rax, 7);
 329     __ movl(rcx, 1);
 330     __ cpuid();
 331     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 332     __ movl(Address(rsi, 0), rax);
 333     __ movl(Address(rsi, 4), rdx);
 334 
 335     //
 336     // Extended cpuid(0x80000000)
 337     //
 338     __ bind(ext_cpuid);
 339     __ movl(rax, 0x80000000);
 340     __ cpuid();
 341     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 342     __ jcc(Assembler::belowEqual, done);
 343     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 344     __ jcc(Assembler::belowEqual, ext_cpuid1);
 345     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 346     __ jccb(Assembler::belowEqual, ext_cpuid5);
 347     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 348     __ jccb(Assembler::belowEqual, ext_cpuid7);
 349     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 350     __ jccb(Assembler::belowEqual, ext_cpuid8);
 351     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 352     __ jccb(Assembler::below, ext_cpuid8);
 353     //
 354     // Extended cpuid(0x8000001E)
 355     //
 356     __ movl(rax, 0x8000001E);
 357     __ cpuid();
 358     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 359     __ movl(Address(rsi, 0), rax);
 360     __ movl(Address(rsi, 4), rbx);
 361     __ movl(Address(rsi, 8), rcx);
 362     __ movl(Address(rsi,12), rdx);
 363 
 364     //
 365     // Extended cpuid(0x80000008)
 366     //
 367     __ bind(ext_cpuid8);
 368     __ movl(rax, 0x80000008);
 369     __ cpuid();
 370     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 371     __ movl(Address(rsi, 0), rax);
 372     __ movl(Address(rsi, 4), rbx);
 373     __ movl(Address(rsi, 8), rcx);
 374     __ movl(Address(rsi,12), rdx);
 375 
 376     //
 377     // Extended cpuid(0x80000007)
 378     //
 379     __ bind(ext_cpuid7);
 380     __ movl(rax, 0x80000007);
 381     __ cpuid();
 382     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 383     __ movl(Address(rsi, 0), rax);
 384     __ movl(Address(rsi, 4), rbx);
 385     __ movl(Address(rsi, 8), rcx);
 386     __ movl(Address(rsi,12), rdx);
 387 
 388     //
 389     // Extended cpuid(0x80000005)
 390     //
 391     __ bind(ext_cpuid5);
 392     __ movl(rax, 0x80000005);
 393     __ cpuid();
 394     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 395     __ movl(Address(rsi, 0), rax);
 396     __ movl(Address(rsi, 4), rbx);
 397     __ movl(Address(rsi, 8), rcx);
 398     __ movl(Address(rsi,12), rdx);
 399 
 400     //
 401     // Extended cpuid(0x80000001)
 402     //
 403     __ bind(ext_cpuid1);
 404     __ movl(rax, 0x80000001);
 405     __ cpuid();
 406     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 407     __ movl(Address(rsi, 0), rax);
 408     __ movl(Address(rsi, 4), rbx);
 409     __ movl(Address(rsi, 8), rcx);
 410     __ movl(Address(rsi,12), rdx);
 411 
 412     //
 413     // Check if OS has enabled XGETBV instruction to access XCR0
 414     // (OSXSAVE feature flag) and CPU supports APX
 415     //
 416     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 417     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 418     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 419     __ movl(rax, 0x200000);
 420     __ andl(rax, Address(rsi, 4));
 421     __ cmpl(rax, 0x200000);
 422     __ jcc(Assembler::notEqual, vector_save_restore);
 423     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 424     __ movl(rax, 0x80000);
 425     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 426     __ cmpl(rax, 0x80000);
 427     __ jcc(Assembler::notEqual, vector_save_restore);
 428 
 429 #ifndef PRODUCT
 430     bool save_apx = UseAPX;
 431     VM_Version::set_apx_cpuFeatures();
 432     UseAPX = true;
 433     __ mov64(r16, VM_Version::egpr_test_value());
 434     __ mov64(r31, VM_Version::egpr_test_value());
 435     __ xorl(rsi, rsi);
 436     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 437     // Generate SEGV
 438     __ movl(rax, Address(rsi, 0));
 439 
 440     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 441     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 442     __ movq(Address(rsi, 0), r16);
 443     __ movq(Address(rsi, 8), r31);
 444 
 445     UseAPX = save_apx;
 446 #endif
 447     __ bind(vector_save_restore);
 448     //
 449     // Check if OS has enabled XGETBV instruction to access XCR0
 450     // (OSXSAVE feature flag) and CPU supports AVX
 451     //
 452     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 453     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 454     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 455     __ cmpl(rcx, 0x18000000);
 456     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 457 
 458     __ movl(rax, 0x6);
 459     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 460     __ cmpl(rax, 0x6);
 461     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 462 
 463     // we need to bridge farther than imm8, so we use this island as a thunk
 464     __ bind(done);
 465     __ jmp(wrapup);
 466 
 467     __ bind(start_simd_check);
 468     //
 469     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 470     // registers are not restored after a signal processing.
 471     // Generate SEGV here (reference through null)
 472     // and check upper YMM/ZMM bits after it.
 473     //
 474     int saved_useavx = UseAVX;
 475     int saved_usesse = UseSSE;
 476 
 477     // If UseAVX is uninitialized or is set by the user to include EVEX
 478     if (use_evex) {
 479       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 480       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 481       __ movl(rax, 0x10000);
 482       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 483       __ cmpl(rax, 0x10000);
 484       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 485       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 486       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 487       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 488       __ movl(rax, 0xE0);
 489       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 490       __ cmpl(rax, 0xE0);
 491       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 492 
 493       if (FLAG_IS_DEFAULT(UseAVX)) {
 494         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 495         __ movl(rax, Address(rsi, 0));
 496         __ cmpl(rax, 0x50654);              // If it is Skylake
 497         __ jcc(Assembler::equal, legacy_setup);
 498       }
 499       // EVEX setup: run in lowest evex mode
 500       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 501       UseAVX = 3;
 502       UseSSE = 2;
 503 #ifdef _WINDOWS
 504       // xmm5-xmm15 are not preserved by caller on windows
 505       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 506       __ subptr(rsp, 64);
 507       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 508       __ subptr(rsp, 64);
 509       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 510       __ subptr(rsp, 64);
 511       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 512 #endif // _WINDOWS
 513 
 514       // load value into all 64 bytes of zmm7 register
 515       __ movl(rcx, VM_Version::ymm_test_value());
 516       __ movdl(xmm0, rcx);
 517       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 518       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 519       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 520       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 521       VM_Version::clean_cpuFeatures();
 522       __ jmp(save_restore_except);
 523     }
 524 
 525     __ bind(legacy_setup);
 526     // AVX setup
 527     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 528     UseAVX = 1;
 529     UseSSE = 2;
 530 #ifdef _WINDOWS
 531     __ subptr(rsp, 32);
 532     __ vmovdqu(Address(rsp, 0), xmm7);
 533     __ subptr(rsp, 32);
 534     __ vmovdqu(Address(rsp, 0), xmm8);
 535     __ subptr(rsp, 32);
 536     __ vmovdqu(Address(rsp, 0), xmm15);
 537 #endif // _WINDOWS
 538 
 539     // load value into all 32 bytes of ymm7 register
 540     __ movl(rcx, VM_Version::ymm_test_value());
 541 
 542     __ movdl(xmm0, rcx);
 543     __ pshufd(xmm0, xmm0, 0x00);
 544     __ vinsertf128_high(xmm0, xmm0);
 545     __ vmovdqu(xmm7, xmm0);
 546     __ vmovdqu(xmm8, xmm0);
 547     __ vmovdqu(xmm15, xmm0);
 548     VM_Version::clean_cpuFeatures();
 549 
 550     __ bind(save_restore_except);
 551     __ xorl(rsi, rsi);
 552     VM_Version::set_cpuinfo_segv_addr(__ pc());
 553     // Generate SEGV
 554     __ movl(rax, Address(rsi, 0));
 555 
 556     VM_Version::set_cpuinfo_cont_addr(__ pc());
 557     // Returns here after signal. Save xmm0 to check it later.
 558 
 559     // If UseAVX is uninitialized or is set by the user to include EVEX
 560     if (use_evex) {
 561       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 562       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 563       __ movl(rax, 0x10000);
 564       __ andl(rax, Address(rsi, 4));
 565       __ cmpl(rax, 0x10000);
 566       __ jcc(Assembler::notEqual, legacy_save_restore);
 567       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 568       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 569       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 570       __ movl(rax, 0xE0);
 571       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 572       __ cmpl(rax, 0xE0);
 573       __ jcc(Assembler::notEqual, legacy_save_restore);
 574 
 575       if (FLAG_IS_DEFAULT(UseAVX)) {
 576         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 577         __ movl(rax, Address(rsi, 0));
 578         __ cmpl(rax, 0x50654);              // If it is Skylake
 579         __ jcc(Assembler::equal, legacy_save_restore);
 580       }
 581       // EVEX check: run in lowest evex mode
 582       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 583       UseAVX = 3;
 584       UseSSE = 2;
 585       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 586       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 587       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 588       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 589       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 590 
 591 #ifdef _WINDOWS
 592       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 593       __ addptr(rsp, 64);
 594       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 595       __ addptr(rsp, 64);
 596       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 597       __ addptr(rsp, 64);
 598 #endif // _WINDOWS
 599       generate_vzeroupper(wrapup);
 600       VM_Version::clean_cpuFeatures();
 601       UseAVX = saved_useavx;
 602       UseSSE = saved_usesse;
 603       __ jmp(wrapup);
 604    }
 605 
 606     __ bind(legacy_save_restore);
 607     // AVX check
 608     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 609     UseAVX = 1;
 610     UseSSE = 2;
 611     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 612     __ vmovdqu(Address(rsi, 0), xmm0);
 613     __ vmovdqu(Address(rsi, 32), xmm7);
 614     __ vmovdqu(Address(rsi, 64), xmm8);
 615     __ vmovdqu(Address(rsi, 96), xmm15);
 616 
 617 #ifdef _WINDOWS
 618     __ vmovdqu(xmm15, Address(rsp, 0));
 619     __ addptr(rsp, 32);
 620     __ vmovdqu(xmm8, Address(rsp, 0));
 621     __ addptr(rsp, 32);
 622     __ vmovdqu(xmm7, Address(rsp, 0));
 623     __ addptr(rsp, 32);
 624 #endif // _WINDOWS
 625 
 626     generate_vzeroupper(wrapup);
 627     VM_Version::clean_cpuFeatures();
 628     UseAVX = saved_useavx;
 629     UseSSE = saved_usesse;
 630 
 631     __ bind(wrapup);
 632     __ popf();
 633     __ pop(rsi);
 634     __ pop(rbx);
 635     __ pop(rbp);
 636     __ ret(0);
 637 
 638 #   undef __
 639 
 640     return start;
 641   };
 642   void generate_vzeroupper(Label& L_wrapup) {
 643 #   define __ _masm->
 644     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 645     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 646     __ jcc(Assembler::notEqual, L_wrapup);
 647     __ movl(rcx, 0x0FFF0FF0);
 648     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 649     __ andl(rcx, Address(rsi, 0));
 650     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 651     __ jcc(Assembler::equal, L_wrapup);
 652     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 653     __ jcc(Assembler::equal, L_wrapup);
 654     // vzeroupper() will use a pre-computed instruction sequence that we
 655     // can't compute until after we've determined CPU capabilities. Use
 656     // uncached variant here directly to be able to bootstrap correctly
 657     __ vzeroupper_uncached();
 658 #   undef __
 659   }
 660   address generate_detect_virt() {
 661     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 662 #   define __ _masm->
 663 
 664     address start = __ pc();
 665 
 666     // Evacuate callee-saved registers
 667     __ push(rbp);
 668     __ push(rbx);
 669     __ push(rsi); // for Windows
 670 
 671     __ mov(rax, c_rarg0); // CPUID leaf
 672     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 673 
 674     __ cpuid();
 675 
 676     // Store result to register array
 677     __ movl(Address(rsi,  0), rax);
 678     __ movl(Address(rsi,  4), rbx);
 679     __ movl(Address(rsi,  8), rcx);
 680     __ movl(Address(rsi, 12), rdx);
 681 
 682     // Epilogue
 683     __ pop(rsi);
 684     __ pop(rbx);
 685     __ pop(rbp);
 686     __ ret(0);
 687 
 688 #   undef __
 689 
 690     return start;
 691   };
 692 
 693 
 694   address generate_getCPUIDBrandString(void) {
 695     // Flags to test CPU type.
 696     const uint32_t HS_EFL_AC           = 0x40000;
 697     const uint32_t HS_EFL_ID           = 0x200000;
 698     // Values for when we don't have a CPUID instruction.
 699     const int      CPU_FAMILY_SHIFT = 8;
 700     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 701     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 702 
 703     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 704 
 705     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 706 #   define __ _masm->
 707 
 708     address start = __ pc();
 709 
 710     //
 711     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 712     //
 713     // rcx and rdx are first and second argument registers on windows
 714 
 715     __ push(rbp);
 716     __ mov(rbp, c_rarg0); // cpuid_info address
 717     __ push(rbx);
 718     __ push(rsi);
 719     __ pushf();          // preserve rbx, and flags
 720     __ pop(rax);
 721     __ push(rax);
 722     __ mov(rcx, rax);
 723     //
 724     // if we are unable to change the AC flag, we have a 386
 725     //
 726     __ xorl(rax, HS_EFL_AC);
 727     __ push(rax);
 728     __ popf();
 729     __ pushf();
 730     __ pop(rax);
 731     __ cmpptr(rax, rcx);
 732     __ jccb(Assembler::notEqual, detect_486);
 733 
 734     __ movl(rax, CPU_FAMILY_386);
 735     __ jmp(done);
 736 
 737     //
 738     // If we are unable to change the ID flag, we have a 486 which does
 739     // not support the "cpuid" instruction.
 740     //
 741     __ bind(detect_486);
 742     __ mov(rax, rcx);
 743     __ xorl(rax, HS_EFL_ID);
 744     __ push(rax);
 745     __ popf();
 746     __ pushf();
 747     __ pop(rax);
 748     __ cmpptr(rcx, rax);
 749     __ jccb(Assembler::notEqual, detect_586);
 750 
 751     __ bind(cpu486);
 752     __ movl(rax, CPU_FAMILY_486);
 753     __ jmp(done);
 754 
 755     //
 756     // At this point, we have a chip which supports the "cpuid" instruction
 757     //
 758     __ bind(detect_586);
 759     __ xorl(rax, rax);
 760     __ cpuid();
 761     __ orl(rax, rax);
 762     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 763                                         // value of at least 1, we give up and
 764                                         // assume a 486
 765 
 766     //
 767     // Extended cpuid(0x80000000) for processor brand string detection
 768     //
 769     __ bind(ext_cpuid);
 770     __ movl(rax, CPUID_EXTENDED_FN);
 771     __ cpuid();
 772     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 773     __ jcc(Assembler::below, done);
 774 
 775     //
 776     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 777     //
 778     __ movl(rax, CPUID_EXTENDED_FN_2);
 779     __ cpuid();
 780     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 781     __ movl(Address(rsi, 0), rax);
 782     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 783     __ movl(Address(rsi, 0), rbx);
 784     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 785     __ movl(Address(rsi, 0), rcx);
 786     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 787     __ movl(Address(rsi,0), rdx);
 788 
 789     //
 790     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 791     //
 792     __ movl(rax, CPUID_EXTENDED_FN_3);
 793     __ cpuid();
 794     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 795     __ movl(Address(rsi, 0), rax);
 796     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 797     __ movl(Address(rsi, 0), rbx);
 798     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 799     __ movl(Address(rsi, 0), rcx);
 800     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 801     __ movl(Address(rsi,0), rdx);
 802 
 803     //
 804     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 805     //
 806     __ movl(rax, CPUID_EXTENDED_FN_4);
 807     __ cpuid();
 808     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 809     __ movl(Address(rsi, 0), rax);
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 811     __ movl(Address(rsi, 0), rbx);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 813     __ movl(Address(rsi, 0), rcx);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 815     __ movl(Address(rsi,0), rdx);
 816 
 817     //
 818     // return
 819     //
 820     __ bind(done);
 821     __ popf();
 822     __ pop(rsi);
 823     __ pop(rbx);
 824     __ pop(rbp);
 825     __ ret(0);
 826 
 827 #   undef __
 828 
 829     return start;
 830   };
 831 };
 832 
 833 void VM_Version::get_processor_features() {
 834 
 835   _cpu = 4; // 486 by default
 836   _model = 0;
 837   _stepping = 0;
 838   _features = 0;
 839   _logical_processors_per_package = 1;
 840   // i486 internal cache is both I&D and has a 16-byte line size
 841   _L1_data_cache_line_size = 16;
 842 
 843   // Get raw processor info
 844 
 845   get_cpu_info_stub(&_cpuid_info);
 846 
 847   assert_is_initialized();
 848   _cpu = extended_cpu_family();
 849   _model = extended_cpu_model();
 850   _stepping = cpu_stepping();
 851 
 852   if (cpu_family() > 4) { // it supports CPUID
 853     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 854     _cpu_features = _features;   // Preserve features
 855     // Logical processors are only available on P4s and above,
 856     // and only if hyperthreading is available.
 857     _logical_processors_per_package = logical_processor_count();
 858     _L1_data_cache_line_size = L1_line_size();
 859   }
 860 
 861   // xchg and xadd instructions
 862   _supports_atomic_getset4 = true;
 863   _supports_atomic_getadd4 = true;
 864   _supports_atomic_getset8 = true;
 865   _supports_atomic_getadd8 = true;
 866 
 867   // OS should support SSE for x64 and hardware should support at least SSE2.
 868   if (!VM_Version::supports_sse2()) {
 869     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 870   }
 871   // in 64 bit the use of SSE2 is the minimum
 872   if (UseSSE < 2) UseSSE = 2;
 873 
 874   // flush_icache_stub have to be generated first.
 875   // That is why Icache line size is hard coded in ICache class,
 876   // see icache_x86.hpp. It is also the reason why we can't use
 877   // clflush instruction in 32-bit VM since it could be running
 878   // on CPU which does not support it.
 879   //
 880   // The only thing we can do is to verify that flushed
 881   // ICache::line_size has correct value.
 882   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 883   // clflush_size is size in quadwords (8 bytes).
 884   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 885 
 886   // assigning this field effectively enables Unsafe.writebackMemory()
 887   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 888   // that is only implemented on x86_64 and only if the OS plays ball
 889   if (os::supports_map_sync()) {
 890     // publish data cache line flush size to generic field, otherwise
 891     // let if default to zero thereby disabling writeback
 892     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 893   }
 894 
 895   // Check if processor has Intel Ecore
 896   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 897     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 898       _model == 0xCC || _model == 0xDD)) {
 899     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 900   }
 901 
 902   if (UseSSE < 4) {
 903     _features &= ~CPU_SSE4_1;
 904     _features &= ~CPU_SSE4_2;
 905   }
 906 
 907   if (UseSSE < 3) {
 908     _features &= ~CPU_SSE3;
 909     _features &= ~CPU_SSSE3;
 910     _features &= ~CPU_SSE4A;
 911   }
 912 
 913   if (UseSSE < 2)
 914     _features &= ~CPU_SSE2;
 915 
 916   if (UseSSE < 1)
 917     _features &= ~CPU_SSE;
 918 
 919   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 920   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 921     UseAVX = 0;
 922   }
 923 
 924   // UseSSE is set to the smaller of what hardware supports and what
 925   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 926   // older Pentiums which do not support it.
 927   int use_sse_limit = 0;
 928   if (UseSSE > 0) {
 929     if (UseSSE > 3 && supports_sse4_1()) {
 930       use_sse_limit = 4;
 931     } else if (UseSSE > 2 && supports_sse3()) {
 932       use_sse_limit = 3;
 933     } else if (UseSSE > 1 && supports_sse2()) {
 934       use_sse_limit = 2;
 935     } else if (UseSSE > 0 && supports_sse()) {
 936       use_sse_limit = 1;
 937     } else {
 938       use_sse_limit = 0;
 939     }
 940   }
 941   if (FLAG_IS_DEFAULT(UseSSE)) {
 942     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 943   } else if (UseSSE > use_sse_limit) {
 944     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 945     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 946   }
 947 
 948   // first try initial setting and detect what we can support
 949   int use_avx_limit = 0;
 950   if (UseAVX > 0) {
 951     if (UseSSE < 4) {
 952       // Don't use AVX if SSE is unavailable or has been disabled.
 953       use_avx_limit = 0;
 954     } else if (UseAVX > 2 && supports_evex()) {
 955       use_avx_limit = 3;
 956     } else if (UseAVX > 1 && supports_avx2()) {
 957       use_avx_limit = 2;
 958     } else if (UseAVX > 0 && supports_avx()) {
 959       use_avx_limit = 1;
 960     } else {
 961       use_avx_limit = 0;
 962     }
 963   }
 964   if (FLAG_IS_DEFAULT(UseAVX)) {
 965     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 966     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 967       FLAG_SET_DEFAULT(UseAVX, 2);
 968     } else {
 969       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 970     }
 971   }
 972 
 973   if (UseAVX > use_avx_limit) {
 974     if (UseSSE < 4) {
 975       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 976     } else {
 977       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 978     }
 979     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 980   }
 981 
 982   if (UseAVX < 3) {
 983     _features &= ~CPU_AVX512F;
 984     _features &= ~CPU_AVX512DQ;
 985     _features &= ~CPU_AVX512CD;
 986     _features &= ~CPU_AVX512BW;
 987     _features &= ~CPU_AVX512VL;
 988     _features &= ~CPU_AVX512_VPOPCNTDQ;
 989     _features &= ~CPU_AVX512_VPCLMULQDQ;
 990     _features &= ~CPU_AVX512_VAES;
 991     _features &= ~CPU_AVX512_VNNI;
 992     _features &= ~CPU_AVX512_VBMI;
 993     _features &= ~CPU_AVX512_VBMI2;
 994     _features &= ~CPU_AVX512_BITALG;
 995     _features &= ~CPU_AVX512_IFMA;
 996     _features &= ~CPU_APX_F;
 997     _features &= ~CPU_AVX512_FP16;
 998   }
 999 
1000   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1001   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1002   if (UseAPX && !apx_supported) {
1003     warning("UseAPX is not supported on this CPU, setting it to false");
1004     FLAG_SET_DEFAULT(UseAPX, false);
1005   } else if (FLAG_IS_DEFAULT(UseAPX)) {
1006     FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1007   }
1008 
1009   if (!UseAPX) {
1010     _features &= ~CPU_APX_F;
1011   }
1012 
1013   if (UseAVX < 2) {
1014     _features &= ~CPU_AVX2;
1015     _features &= ~CPU_AVX_IFMA;
1016   }
1017 
1018   if (UseAVX < 1) {
1019     _features &= ~CPU_AVX;
1020     _features &= ~CPU_VZEROUPPER;
1021     _features &= ~CPU_F16C;
1022     _features &= ~CPU_SHA512;
1023   }
1024 
1025   if (logical_processors_per_package() == 1) {
1026     // HT processor could be installed on a system which doesn't support HT.
1027     _features &= ~CPU_HT;
1028   }
1029 
1030   if (is_intel()) { // Intel cpus specific settings
1031     if (is_knights_family()) {
1032       _features &= ~CPU_VZEROUPPER;
1033       _features &= ~CPU_AVX512BW;
1034       _features &= ~CPU_AVX512VL;
1035       _features &= ~CPU_AVX512DQ;
1036       _features &= ~CPU_AVX512_VNNI;
1037       _features &= ~CPU_AVX512_VAES;
1038       _features &= ~CPU_AVX512_VPOPCNTDQ;
1039       _features &= ~CPU_AVX512_VPCLMULQDQ;
1040       _features &= ~CPU_AVX512_VBMI;
1041       _features &= ~CPU_AVX512_VBMI2;
1042       _features &= ~CPU_CLWB;
1043       _features &= ~CPU_FLUSHOPT;
1044       _features &= ~CPU_GFNI;
1045       _features &= ~CPU_AVX512_BITALG;
1046       _features &= ~CPU_AVX512_IFMA;
1047       _features &= ~CPU_AVX_IFMA;
1048       _features &= ~CPU_AVX512_FP16;
1049     }
1050   }
1051 
1052   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1053     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1054   } else {
1055     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1056   }
1057 
1058   assert(supports_cpuid(), "Always present");
1059   assert(supports_clflush(), "Always present");
1060   if (X86ICacheSync == -1) {
1061     // Auto-detect, choosing the best performant one that still flushes
1062     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1063     if (supports_clwb()) {
1064       FLAG_SET_ERGO(X86ICacheSync, 3);
1065     } else if (supports_clflushopt()) {
1066       FLAG_SET_ERGO(X86ICacheSync, 2);
1067     } else {
1068       FLAG_SET_ERGO(X86ICacheSync, 1);
1069     }
1070   } else {
1071     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1072       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1073     }
1074     if ((X86ICacheSync == 3) && !supports_clwb()) {
1075       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1076     }
1077     if ((X86ICacheSync == 5) && !supports_serialize()) {
1078       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1079     }
1080   }
1081 
1082   char buf[1024];
1083   int cpu_info_size = jio_snprintf(
1084               buf, sizeof(buf),
1085               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1086               cores_per_cpu(), threads_per_core(),
1087               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1088   assert(cpu_info_size > 0, "not enough temporary space allocated");
1089   insert_features_names(buf + cpu_info_size, sizeof(buf) - cpu_info_size, _features_names);
1090 
1091   _cpu_info_string = os::strdup(buf);
1092 
1093   _features_string = extract_features_string(_cpu_info_string,
1094                                              strnlen(_cpu_info_string, sizeof(buf)),
1095                                              cpu_info_size);
1096 
1097   // Use AES instructions if available.
1098   if (supports_aes()) {
1099     if (FLAG_IS_DEFAULT(UseAES)) {
1100       FLAG_SET_DEFAULT(UseAES, true);
1101     }
1102     if (!UseAES) {
1103       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1104         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1105       }
1106       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1107     } else {
1108       if (UseSSE > 2) {
1109         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1110           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1111         }
1112       } else {
1113         // The AES intrinsic stubs require AES instruction support (of course)
1114         // but also require sse3 mode or higher for instructions it use.
1115         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1116           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1117         }
1118         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1119       }
1120 
1121       // --AES-CTR begins--
1122       if (!UseAESIntrinsics) {
1123         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1124           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1125           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1126         }
1127       } else {
1128         if (supports_sse4_1()) {
1129           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1130             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1131           }
1132         } else {
1133            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1134            // but also require sse4.1 mode or higher for instructions it use.
1135           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1136              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1137            }
1138            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1139         }
1140       }
1141       // --AES-CTR ends--
1142     }
1143   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1144     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1145       warning("AES instructions are not available on this CPU");
1146       FLAG_SET_DEFAULT(UseAES, false);
1147     }
1148     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1149       warning("AES intrinsics are not available on this CPU");
1150       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1151     }
1152     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1153       warning("AES-CTR intrinsics are not available on this CPU");
1154       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1155     }
1156   }
1157 
1158   // Use CLMUL instructions if available.
1159   if (supports_clmul()) {
1160     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1161       UseCLMUL = true;
1162     }
1163   } else if (UseCLMUL) {
1164     if (!FLAG_IS_DEFAULT(UseCLMUL))
1165       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1166     FLAG_SET_DEFAULT(UseCLMUL, false);
1167   }
1168 
1169   if (UseCLMUL && (UseSSE > 2)) {
1170     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1171       UseCRC32Intrinsics = true;
1172     }
1173   } else if (UseCRC32Intrinsics) {
1174     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1175       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1176     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1177   }
1178 
1179   if (supports_avx2()) {
1180     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1181       UseAdler32Intrinsics = true;
1182     }
1183   } else if (UseAdler32Intrinsics) {
1184     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1185       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1186     }
1187     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1188   }
1189 
1190   if (supports_sse4_2() && supports_clmul()) {
1191     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1192       UseCRC32CIntrinsics = true;
1193     }
1194   } else if (UseCRC32CIntrinsics) {
1195     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1196       warning("CRC32C intrinsics are not available on this CPU");
1197     }
1198     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1199   }
1200 
1201   // GHASH/GCM intrinsics
1202   if (UseCLMUL && (UseSSE > 2)) {
1203     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1204       UseGHASHIntrinsics = true;
1205     }
1206   } else if (UseGHASHIntrinsics) {
1207     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1208       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1209     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1210   }
1211 
1212   // ChaCha20 Intrinsics
1213   // As long as the system supports AVX as a baseline we can do a
1214   // SIMD-enabled block function.  StubGenerator makes the determination
1215   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1216   // version.
1217   if (UseAVX >= 1) {
1218       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1219           UseChaCha20Intrinsics = true;
1220       }
1221   } else if (UseChaCha20Intrinsics) {
1222       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1223           warning("ChaCha20 intrinsic requires AVX instructions");
1224       }
1225       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1226   }
1227 
1228   // Dilithium Intrinsics
1229   // Currently we only have them for AVX512
1230   if (supports_evex() && supports_avx512bw()) {
1231       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1232           UseDilithiumIntrinsics = true;
1233       }
1234   } else if (UseDilithiumIntrinsics) {
1235       warning("Intrinsics for ML-DSA are not available on this CPU.");
1236       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1237   }
1238 
1239   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1240   if (UseAVX >= 2) {
1241     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1242       UseBASE64Intrinsics = true;
1243     }
1244   } else if (UseBASE64Intrinsics) {
1245      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1246       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1247     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1248   }
1249 
1250   if (supports_fma()) {
1251     if (FLAG_IS_DEFAULT(UseFMA)) {
1252       UseFMA = true;
1253     }
1254   } else if (UseFMA) {
1255     warning("FMA instructions are not available on this CPU");
1256     FLAG_SET_DEFAULT(UseFMA, false);
1257   }
1258 
1259   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1260     UseMD5Intrinsics = true;
1261   }
1262 
1263   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1264     if (FLAG_IS_DEFAULT(UseSHA)) {
1265       UseSHA = true;
1266     }
1267   } else if (UseSHA) {
1268     warning("SHA instructions are not available on this CPU");
1269     FLAG_SET_DEFAULT(UseSHA, false);
1270   }
1271 
1272   if (supports_sha() && supports_sse4_1() && UseSHA) {
1273     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1274       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1275     }
1276   } else if (UseSHA1Intrinsics) {
1277     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1278     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1279   }
1280 
1281   if (supports_sse4_1() && UseSHA) {
1282     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1283       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1284     }
1285   } else if (UseSHA256Intrinsics) {
1286     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1287     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1288   }
1289 
1290   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1291     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1292       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1293     }
1294   } else if (UseSHA512Intrinsics) {
1295     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1296     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1297   }
1298 
1299   if (supports_evex() && supports_avx512bw()) {
1300       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1301           UseSHA3Intrinsics = true;
1302       }
1303   } else if (UseSHA3Intrinsics) {
1304       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1305       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1306   }
1307 
1308   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1309     FLAG_SET_DEFAULT(UseSHA, false);
1310   }
1311 
1312 #if COMPILER2_OR_JVMCI
1313   int max_vector_size = 0;
1314   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1315     // 16 byte vectors (in XMM) are supported with SSE2+
1316     max_vector_size = 16;
1317   } else if (UseAVX == 1 || UseAVX == 2) {
1318     // 32 bytes vectors (in YMM) are only supported with AVX+
1319     max_vector_size = 32;
1320   } else if (UseAVX > 2) {
1321     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1322     max_vector_size = 64;
1323   }
1324 
1325   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1326 
1327   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1328     if (MaxVectorSize < min_vector_size) {
1329       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1330       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1331     }
1332     if (MaxVectorSize > max_vector_size) {
1333       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1334       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1335     }
1336     if (!is_power_of_2(MaxVectorSize)) {
1337       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1338       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1339     }
1340   } else {
1341     // If default, use highest supported configuration
1342     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1343   }
1344 
1345 #if defined(COMPILER2) && defined(ASSERT)
1346   if (MaxVectorSize > 0) {
1347     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1348       tty->print_cr("State of YMM registers after signal handle:");
1349       int nreg = 4;
1350       const char* ymm_name[4] = {"0", "7", "8", "15"};
1351       for (int i = 0; i < nreg; i++) {
1352         tty->print("YMM%s:", ymm_name[i]);
1353         for (int j = 7; j >=0; j--) {
1354           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1355         }
1356         tty->cr();
1357       }
1358     }
1359   }
1360 #endif // COMPILER2 && ASSERT
1361 
1362   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1363     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1364       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1365     }
1366   } else if (UsePoly1305Intrinsics) {
1367     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1368     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1369   }
1370 
1371   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1372     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1373       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1374     }
1375   } else if (UseIntPolyIntrinsics) {
1376     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1377     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1378   }
1379 
1380   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1381     UseMultiplyToLenIntrinsic = true;
1382   }
1383   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1384     UseSquareToLenIntrinsic = true;
1385   }
1386   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1387     UseMulAddIntrinsic = true;
1388   }
1389   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1390     UseMontgomeryMultiplyIntrinsic = true;
1391   }
1392   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1393     UseMontgomerySquareIntrinsic = true;
1394   }
1395 #endif // COMPILER2_OR_JVMCI
1396 
1397   // On new cpus instructions which update whole XMM register should be used
1398   // to prevent partial register stall due to dependencies on high half.
1399   //
1400   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1401   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1402   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1403   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1404 
1405 
1406   if (is_zx()) { // ZX cpus specific settings
1407     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1408       UseStoreImmI16 = false; // don't use it on ZX cpus
1409     }
1410     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1411       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1412         // Use it on all ZX cpus
1413         UseAddressNop = true;
1414       }
1415     }
1416     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1417       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1418     }
1419     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1420       if (supports_sse3()) {
1421         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1422       } else {
1423         UseXmmRegToRegMoveAll = false;
1424       }
1425     }
1426     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1427 #ifdef COMPILER2
1428       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1429         // For new ZX cpus do the next optimization:
1430         // don't align the beginning of a loop if there are enough instructions
1431         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1432         // in current fetch line (OptoLoopAlignment) or the padding
1433         // is big (> MaxLoopPad).
1434         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1435         // generated NOP instructions. 11 is the largest size of one
1436         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1437         MaxLoopPad = 11;
1438       }
1439 #endif // COMPILER2
1440       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1441         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1442       }
1443       if (supports_sse4_2()) { // new ZX cpus
1444         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1445           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1446         }
1447       }
1448       if (supports_sse4_2()) {
1449         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1450           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1451         }
1452       } else {
1453         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1454           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1455         }
1456         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1457       }
1458     }
1459 
1460     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1461       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1462     }
1463   }
1464 
1465   if (is_amd_family()) { // AMD cpus specific settings
1466     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1467       // Use it on new AMD cpus starting from Opteron.
1468       UseAddressNop = true;
1469     }
1470     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1471       // Use it on new AMD cpus starting from Opteron.
1472       UseNewLongLShift = true;
1473     }
1474     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1475       if (supports_sse4a()) {
1476         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1477       } else {
1478         UseXmmLoadAndClearUpper = false;
1479       }
1480     }
1481     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1482       if (supports_sse4a()) {
1483         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1484       } else {
1485         UseXmmRegToRegMoveAll = false;
1486       }
1487     }
1488     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1489       if (supports_sse4a()) {
1490         UseXmmI2F = true;
1491       } else {
1492         UseXmmI2F = false;
1493       }
1494     }
1495     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1496       if (supports_sse4a()) {
1497         UseXmmI2D = true;
1498       } else {
1499         UseXmmI2D = false;
1500       }
1501     }
1502     if (supports_sse4_2()) {
1503       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1504         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1505       }
1506     } else {
1507       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1508         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1509       }
1510       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1511     }
1512 
1513     // some defaults for AMD family 15h
1514     if (cpu_family() == 0x15) {
1515       // On family 15h processors default is no sw prefetch
1516       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1517         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1518       }
1519       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1520       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1521         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1522       }
1523       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1524       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1525         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1526       }
1527       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1528         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1529       }
1530     }
1531 
1532 #ifdef COMPILER2
1533     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1534       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1535       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1536     }
1537 #endif // COMPILER2
1538 
1539     // Some defaults for AMD family >= 17h && Hygon family 18h
1540     if (cpu_family() >= 0x17) {
1541       // On family >=17h processors use XMM and UnalignedLoadStores
1542       // for Array Copy
1543       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1544         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1545       }
1546       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1547         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1548       }
1549 #ifdef COMPILER2
1550       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1551         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1552       }
1553 #endif
1554     }
1555   }
1556 
1557   if (is_intel()) { // Intel cpus specific settings
1558     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1559       UseStoreImmI16 = false; // don't use it on Intel cpus
1560     }
1561     if (cpu_family() == 6 || cpu_family() == 15) {
1562       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1563         // Use it on all Intel cpus starting from PentiumPro
1564         UseAddressNop = true;
1565       }
1566     }
1567     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1568       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1569     }
1570     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1571       if (supports_sse3()) {
1572         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1573       } else {
1574         UseXmmRegToRegMoveAll = false;
1575       }
1576     }
1577     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1578 #ifdef COMPILER2
1579       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1580         // For new Intel cpus do the next optimization:
1581         // don't align the beginning of a loop if there are enough instructions
1582         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1583         // in current fetch line (OptoLoopAlignment) or the padding
1584         // is big (> MaxLoopPad).
1585         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1586         // generated NOP instructions. 11 is the largest size of one
1587         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1588         MaxLoopPad = 11;
1589       }
1590 #endif // COMPILER2
1591 
1592       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1593         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1594       }
1595       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1596         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1597           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1598         }
1599       }
1600       if (supports_sse4_2()) {
1601         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1602           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1603         }
1604       } else {
1605         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1606           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1607         }
1608         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1609       }
1610     }
1611     if (is_atom_family() || is_knights_family()) {
1612 #ifdef COMPILER2
1613       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1614         OptoScheduling = true;
1615       }
1616 #endif
1617       if (supports_sse4_2()) { // Silvermont
1618         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1619           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1620         }
1621       }
1622       if (FLAG_IS_DEFAULT(UseIncDec)) {
1623         FLAG_SET_DEFAULT(UseIncDec, false);
1624       }
1625     }
1626     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1627       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1628     }
1629 #ifdef COMPILER2
1630     if (UseAVX > 2) {
1631       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1632           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1633            ArrayOperationPartialInlineSize != 0 &&
1634            ArrayOperationPartialInlineSize != 16 &&
1635            ArrayOperationPartialInlineSize != 32 &&
1636            ArrayOperationPartialInlineSize != 64)) {
1637         int inline_size = 0;
1638         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1639           inline_size = 64;
1640         } else if (MaxVectorSize >= 32) {
1641           inline_size = 32;
1642         } else if (MaxVectorSize >= 16) {
1643           inline_size = 16;
1644         }
1645         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1646           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1647         }
1648         ArrayOperationPartialInlineSize = inline_size;
1649       }
1650 
1651       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1652         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1653         if (ArrayOperationPartialInlineSize) {
1654           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1655         } else {
1656           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1657         }
1658       }
1659     }
1660 #endif
1661   }
1662 
1663 #ifdef COMPILER2
1664   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1665     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1666       OptimizeFill = false;
1667     }
1668   }
1669 #endif
1670 
1671   if (UseSSE42Intrinsics) {
1672     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1673       UseVectorizedMismatchIntrinsic = true;
1674     }
1675   } else if (UseVectorizedMismatchIntrinsic) {
1676     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1677       warning("vectorizedMismatch intrinsics are not available on this CPU");
1678     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1679   }
1680   if (UseAVX >= 2) {
1681     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1682   } else if (UseVectorizedHashCodeIntrinsic) {
1683     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1684       warning("vectorizedHashCode intrinsics are not available on this CPU");
1685     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1686   }
1687 
1688   // Use count leading zeros count instruction if available.
1689   if (supports_lzcnt()) {
1690     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1691       UseCountLeadingZerosInstruction = true;
1692     }
1693    } else if (UseCountLeadingZerosInstruction) {
1694     warning("lzcnt instruction is not available on this CPU");
1695     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1696   }
1697 
1698   // Use count trailing zeros instruction if available
1699   if (supports_bmi1()) {
1700     // tzcnt does not require VEX prefix
1701     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1702       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1703         // Don't use tzcnt if BMI1 is switched off on command line.
1704         UseCountTrailingZerosInstruction = false;
1705       } else {
1706         UseCountTrailingZerosInstruction = true;
1707       }
1708     }
1709   } else if (UseCountTrailingZerosInstruction) {
1710     warning("tzcnt instruction is not available on this CPU");
1711     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1712   }
1713 
1714   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1715   // VEX prefix is generated only when AVX > 0.
1716   if (supports_bmi1() && supports_avx()) {
1717     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1718       UseBMI1Instructions = true;
1719     }
1720   } else if (UseBMI1Instructions) {
1721     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1722     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1723   }
1724 
1725   if (supports_bmi2() && supports_avx()) {
1726     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1727       UseBMI2Instructions = true;
1728     }
1729   } else if (UseBMI2Instructions) {
1730     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1731     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1732   }
1733 
1734   // Use population count instruction if available.
1735   if (supports_popcnt()) {
1736     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1737       UsePopCountInstruction = true;
1738     }
1739   } else if (UsePopCountInstruction) {
1740     warning("POPCNT instruction is not available on this CPU");
1741     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1742   }
1743 
1744   // Use fast-string operations if available.
1745   if (supports_erms()) {
1746     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1747       UseFastStosb = true;
1748     }
1749   } else if (UseFastStosb) {
1750     warning("fast-string operations are not available on this CPU");
1751     FLAG_SET_DEFAULT(UseFastStosb, false);
1752   }
1753 
1754   // For AMD Processors use XMM/YMM MOVDQU instructions
1755   // for Object Initialization as default
1756   if (is_amd() && cpu_family() >= 0x19) {
1757     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1758       UseFastStosb = false;
1759     }
1760   }
1761 
1762 #ifdef COMPILER2
1763   if (is_intel() && MaxVectorSize > 16) {
1764     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1765       UseFastStosb = false;
1766     }
1767   }
1768 #endif
1769 
1770   // Use XMM/YMM MOVDQU instruction for Object Initialization
1771   if (UseUnalignedLoadStores) {
1772     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1773       UseXMMForObjInit = true;
1774     }
1775   } else if (UseXMMForObjInit) {
1776     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1777     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1778   }
1779 
1780 #ifdef COMPILER2
1781   if (FLAG_IS_DEFAULT(AlignVector)) {
1782     // Modern processors allow misaligned memory operations for vectors.
1783     AlignVector = !UseUnalignedLoadStores;
1784   }
1785 #endif // COMPILER2
1786 
1787   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1788     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1789       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1790     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1791       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1792     }
1793   }
1794 
1795   // Allocation prefetch settings
1796   int cache_line_size = checked_cast<int>(prefetch_data_size());
1797   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1798       (cache_line_size > AllocatePrefetchStepSize)) {
1799     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1800   }
1801 
1802   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1803     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1804     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1805       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1806     }
1807     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1808   }
1809 
1810   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1811     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1812     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1813   }
1814 
1815   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1816     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1817         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1818       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1819     }
1820 #ifdef COMPILER2
1821     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1822       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1823     }
1824 #endif
1825   }
1826 
1827   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1828 #ifdef COMPILER2
1829     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1830       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1831     }
1832 #endif
1833   }
1834 
1835   // Prefetch settings
1836 
1837   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1838   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1839   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1840   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1841 
1842   // gc copy/scan is disabled if prefetchw isn't supported, because
1843   // Prefetch::write emits an inlined prefetchw on Linux.
1844   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1845   // The used prefetcht0 instruction works for both amd64 and em64t.
1846 
1847   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1848     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1849   }
1850   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1851     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1852   }
1853 
1854   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1855      (cache_line_size > ContendedPaddingWidth))
1856      ContendedPaddingWidth = cache_line_size;
1857 
1858   // This machine allows unaligned memory accesses
1859   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1860     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1861   }
1862 
1863 #ifndef PRODUCT
1864   if (log_is_enabled(Info, os, cpu)) {
1865     LogStream ls(Log(os, cpu)::info());
1866     outputStream* log = &ls;
1867     log->print_cr("Logical CPUs per core: %u",
1868                   logical_processors_per_package());
1869     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1870     log->print("UseSSE=%d", UseSSE);
1871     if (UseAVX > 0) {
1872       log->print("  UseAVX=%d", UseAVX);
1873     }
1874     if (UseAES) {
1875       log->print("  UseAES=1");
1876     }
1877 #ifdef COMPILER2
1878     if (MaxVectorSize > 0) {
1879       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1880     }
1881 #endif
1882     log->cr();
1883     log->print("Allocation");
1884     if (AllocatePrefetchStyle <= 0) {
1885       log->print_cr(": no prefetching");
1886     } else {
1887       log->print(" prefetching: ");
1888       if (AllocatePrefetchInstr == 0) {
1889         log->print("PREFETCHNTA");
1890       } else if (AllocatePrefetchInstr == 1) {
1891         log->print("PREFETCHT0");
1892       } else if (AllocatePrefetchInstr == 2) {
1893         log->print("PREFETCHT2");
1894       } else if (AllocatePrefetchInstr == 3) {
1895         log->print("PREFETCHW");
1896       }
1897       if (AllocatePrefetchLines > 1) {
1898         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1899       } else {
1900         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1901       }
1902     }
1903 
1904     if (PrefetchCopyIntervalInBytes > 0) {
1905       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1906     }
1907     if (PrefetchScanIntervalInBytes > 0) {
1908       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1909     }
1910     if (ContendedPaddingWidth > 0) {
1911       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1912     }
1913   }
1914 #endif // !PRODUCT
1915   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1916       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1917   }
1918   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1919       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1920   }
1921 }
1922 
1923 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1924   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1925   if (vrt == XenHVM) {
1926     st->print_cr("Xen hardware-assisted virtualization detected");
1927   } else if (vrt == KVM) {
1928     st->print_cr("KVM virtualization detected");
1929   } else if (vrt == VMWare) {
1930     st->print_cr("VMWare virtualization detected");
1931     VirtualizationSupport::print_virtualization_info(st);
1932   } else if (vrt == HyperV) {
1933     st->print_cr("Hyper-V virtualization detected");
1934   } else if (vrt == HyperVRole) {
1935     st->print_cr("Hyper-V role detected");
1936   }
1937 }
1938 
1939 bool VM_Version::compute_has_intel_jcc_erratum() {
1940   if (!is_intel_family_core()) {
1941     // Only Intel CPUs are affected.
1942     return false;
1943   }
1944   // The following table of affected CPUs is based on the following document released by Intel:
1945   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1946   switch (_model) {
1947   case 0x8E:
1948     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1949     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1950     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1951     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1952     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1953     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1954     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1955     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1956     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1957     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1958   case 0x4E:
1959     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1960     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1961     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1962     return _stepping == 0x3;
1963   case 0x55:
1964     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1965     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1966     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1967     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1968     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1969     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1970     return _stepping == 0x4 || _stepping == 0x7;
1971   case 0x5E:
1972     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1973     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1974     return _stepping == 0x3;
1975   case 0x9E:
1976     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1977     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1978     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1979     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1980     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1981     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1982     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1983     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1984     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1985     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1986     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1987     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1988     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1989     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1990     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1991   case 0xA5:
1992     // Not in Intel documentation.
1993     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
1994     return true;
1995   case 0xA6:
1996     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
1997     return _stepping == 0x0;
1998   case 0xAE:
1999     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2000     return _stepping == 0xA;
2001   default:
2002     // If we are running on another intel machine not recognized in the table, we are okay.
2003     return false;
2004   }
2005 }
2006 
2007 // On Xen, the cpuid instruction returns
2008 //  eax / registers[0]: Version of Xen
2009 //  ebx / registers[1]: chars 'XenV'
2010 //  ecx / registers[2]: chars 'MMXe'
2011 //  edx / registers[3]: chars 'nVMM'
2012 //
2013 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2014 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2015 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2016 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2017 //
2018 // more information :
2019 // https://kb.vmware.com/s/article/1009458
2020 //
2021 void VM_Version::check_virtualizations() {
2022   uint32_t registers[4] = {0};
2023   char signature[13] = {0};
2024 
2025   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2026   // from 0x40000000 until 0x40010000.
2027   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2028   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2029     detect_virt_stub(leaf, registers);
2030     memcpy(signature, &registers[1], 12);
2031 
2032     if (strncmp("VMwareVMware", signature, 12) == 0) {
2033       Abstract_VM_Version::_detected_virtualization = VMWare;
2034       // check for extended metrics from guestlib
2035       VirtualizationSupport::initialize();
2036     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2037       Abstract_VM_Version::_detected_virtualization = HyperV;
2038 #ifdef _WINDOWS
2039       // CPUID leaf 0x40000007 is available to the root partition only.
2040       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2041       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2042       detect_virt_stub(0x40000007, registers);
2043       if ((registers[0] != 0x0) ||
2044           (registers[1] != 0x0) ||
2045           (registers[2] != 0x0) ||
2046           (registers[3] != 0x0)) {
2047         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2048       }
2049 #endif
2050     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2051       Abstract_VM_Version::_detected_virtualization = KVM;
2052     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2053       Abstract_VM_Version::_detected_virtualization = XenHVM;
2054     }
2055   }
2056 }
2057 
2058 #ifdef COMPILER2
2059 // Determine if it's running on Cascade Lake using default options.
2060 bool VM_Version::is_default_intel_cascade_lake() {
2061   return FLAG_IS_DEFAULT(UseAVX) &&
2062          FLAG_IS_DEFAULT(MaxVectorSize) &&
2063          UseAVX > 2 &&
2064          is_intel_cascade_lake();
2065 }
2066 #endif
2067 
2068 bool VM_Version::is_intel_cascade_lake() {
2069   return is_intel_skylake() && _stepping >= 5;
2070 }
2071 
2072 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2073 // for implementing the array copy and clear operations.
2074 // The Intel platforms that supports the serialize instruction
2075 // has improved implementation of 64-byte load/stores and so the default
2076 // threshold is set to 0 for these platforms.
2077 int VM_Version::avx3_threshold() {
2078   return (is_intel_family_core() &&
2079           supports_serialize() &&
2080           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2081 }
2082 
2083 void VM_Version::clear_apx_test_state() {
2084   clear_apx_test_state_stub();
2085 }
2086 
2087 static bool _vm_version_initialized = false;
2088 
2089 void VM_Version::initialize() {
2090   ResourceMark rm;
2091   // Making this stub must be FIRST use of assembler
2092   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2093   if (stub_blob == nullptr) {
2094     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2095   }
2096   CodeBuffer c(stub_blob);
2097   VM_Version_StubGenerator g(&c);
2098 
2099   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2100                                      g.generate_get_cpu_info());
2101   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2102                                      g.generate_detect_virt());
2103   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2104                                      g.clear_apx_test_state());
2105   get_processor_features();
2106 
2107   Assembler::precompute_instructions();
2108 
2109   if (VM_Version::supports_hv()) { // Supports hypervisor
2110     check_virtualizations();
2111   }
2112   _vm_version_initialized = true;
2113 }
2114 
2115 typedef enum {
2116    CPU_FAMILY_8086_8088  = 0,
2117    CPU_FAMILY_INTEL_286  = 2,
2118    CPU_FAMILY_INTEL_386  = 3,
2119    CPU_FAMILY_INTEL_486  = 4,
2120    CPU_FAMILY_PENTIUM    = 5,
2121    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2122    CPU_FAMILY_PENTIUM_4  = 0xF
2123 } FamilyFlag;
2124 
2125 typedef enum {
2126   RDTSCP_FLAG  = 0x08000000, // bit 27
2127   INTEL64_FLAG = 0x20000000  // bit 29
2128 } _featureExtendedEdxFlag;
2129 
2130 typedef enum {
2131    FPU_FLAG     = 0x00000001,
2132    VME_FLAG     = 0x00000002,
2133    DE_FLAG      = 0x00000004,
2134    PSE_FLAG     = 0x00000008,
2135    TSC_FLAG     = 0x00000010,
2136    MSR_FLAG     = 0x00000020,
2137    PAE_FLAG     = 0x00000040,
2138    MCE_FLAG     = 0x00000080,
2139    CX8_FLAG     = 0x00000100,
2140    APIC_FLAG    = 0x00000200,
2141    SEP_FLAG     = 0x00000800,
2142    MTRR_FLAG    = 0x00001000,
2143    PGE_FLAG     = 0x00002000,
2144    MCA_FLAG     = 0x00004000,
2145    CMOV_FLAG    = 0x00008000,
2146    PAT_FLAG     = 0x00010000,
2147    PSE36_FLAG   = 0x00020000,
2148    PSNUM_FLAG   = 0x00040000,
2149    CLFLUSH_FLAG = 0x00080000,
2150    DTS_FLAG     = 0x00200000,
2151    ACPI_FLAG    = 0x00400000,
2152    MMX_FLAG     = 0x00800000,
2153    FXSR_FLAG    = 0x01000000,
2154    SSE_FLAG     = 0x02000000,
2155    SSE2_FLAG    = 0x04000000,
2156    SS_FLAG      = 0x08000000,
2157    HTT_FLAG     = 0x10000000,
2158    TM_FLAG      = 0x20000000
2159 } FeatureEdxFlag;
2160 
2161 static BufferBlob* cpuid_brand_string_stub_blob;
2162 static const int   cpuid_brand_string_stub_size = 550;
2163 
2164 extern "C" {
2165   typedef void (*getCPUIDBrandString_stub_t)(void*);
2166 }
2167 
2168 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2169 
2170 // VM_Version statics
2171 enum {
2172   ExtendedFamilyIdLength_INTEL = 16,
2173   ExtendedFamilyIdLength_AMD   = 24
2174 };
2175 
2176 const size_t VENDOR_LENGTH = 13;
2177 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2178 static char* _cpu_brand_string = nullptr;
2179 static int64_t _max_qualified_cpu_frequency = 0;
2180 
2181 static int _no_of_threads = 0;
2182 static int _no_of_cores = 0;
2183 
2184 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2185   "8086/8088",
2186   "",
2187   "286",
2188   "386",
2189   "486",
2190   "Pentium",
2191   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2192   "",
2193   "",
2194   "",
2195   "",
2196   "",
2197   "",
2198   "",
2199   "",
2200   "Pentium 4"
2201 };
2202 
2203 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2204   "",
2205   "",
2206   "",
2207   "",
2208   "5x86",
2209   "K5/K6",
2210   "Athlon/AthlonXP",
2211   "",
2212   "",
2213   "",
2214   "",
2215   "",
2216   "",
2217   "",
2218   "",
2219   "Opteron/Athlon64",
2220   "Opteron QC/Phenom",  // Barcelona et.al.
2221   "",
2222   "",
2223   "",
2224   "",
2225   "",
2226   "",
2227   "Zen"
2228 };
2229 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2230 // September 2013, Vol 3C Table 35-1
2231 const char* const _model_id_pentium_pro[] = {
2232   "",
2233   "Pentium Pro",
2234   "",
2235   "Pentium II model 3",
2236   "",
2237   "Pentium II model 5/Xeon/Celeron",
2238   "Celeron",
2239   "Pentium III/Pentium III Xeon",
2240   "Pentium III/Pentium III Xeon",
2241   "Pentium M model 9",    // Yonah
2242   "Pentium III, model A",
2243   "Pentium III, model B",
2244   "",
2245   "Pentium M model D",    // Dothan
2246   "",
2247   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2248   "",
2249   "",
2250   "",
2251   "",
2252   "",
2253   "",
2254   "Celeron",              // 0x16 Celeron 65nm
2255   "Core 2",               // 0x17 Penryn / Harpertown
2256   "",
2257   "",
2258   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2259   "Atom",                 // 0x1B Z5xx series Silverthorn
2260   "",
2261   "Core 2",               // 0x1D Dunnington (6-core)
2262   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2263   "",
2264   "",
2265   "",
2266   "",
2267   "",
2268   "",
2269   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2270   "",
2271   "",
2272   "",                     // 0x28
2273   "",
2274   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2275   "",
2276   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2277   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2278   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2279   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2280   "",
2281   "",
2282   "",
2283   "",
2284   "",
2285   "",
2286   "",
2287   "",
2288   "",
2289   "",
2290   "Ivy Bridge",           // 0x3a
2291   "",
2292   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2293   "",                     // 0x3d "Next Generation Intel Core Processor"
2294   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2295   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2296   "",
2297   "",
2298   "",
2299   "",
2300   "",
2301   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2302   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2303   nullptr
2304 };
2305 
2306 /* Brand ID is for back compatibility
2307  * Newer CPUs uses the extended brand string */
2308 const char* const _brand_id[] = {
2309   "",
2310   "Celeron processor",
2311   "Pentium III processor",
2312   "Intel Pentium III Xeon processor",
2313   "",
2314   "",
2315   "",
2316   "",
2317   "Intel Pentium 4 processor",
2318   nullptr
2319 };
2320 
2321 
2322 const char* const _feature_edx_id[] = {
2323   "On-Chip FPU",
2324   "Virtual Mode Extensions",
2325   "Debugging Extensions",
2326   "Page Size Extensions",
2327   "Time Stamp Counter",
2328   "Model Specific Registers",
2329   "Physical Address Extension",
2330   "Machine Check Exceptions",
2331   "CMPXCHG8B Instruction",
2332   "On-Chip APIC",
2333   "",
2334   "Fast System Call",
2335   "Memory Type Range Registers",
2336   "Page Global Enable",
2337   "Machine Check Architecture",
2338   "Conditional Mov Instruction",
2339   "Page Attribute Table",
2340   "36-bit Page Size Extension",
2341   "Processor Serial Number",
2342   "CLFLUSH Instruction",
2343   "",
2344   "Debug Trace Store feature",
2345   "ACPI registers in MSR space",
2346   "Intel Architecture MMX Technology",
2347   "Fast Float Point Save and Restore",
2348   "Streaming SIMD extensions",
2349   "Streaming SIMD extensions 2",
2350   "Self-Snoop",
2351   "Hyper Threading",
2352   "Thermal Monitor",
2353   "",
2354   "Pending Break Enable"
2355 };
2356 
2357 const char* const _feature_extended_edx_id[] = {
2358   "",
2359   "",
2360   "",
2361   "",
2362   "",
2363   "",
2364   "",
2365   "",
2366   "",
2367   "",
2368   "",
2369   "SYSCALL/SYSRET",
2370   "",
2371   "",
2372   "",
2373   "",
2374   "",
2375   "",
2376   "",
2377   "",
2378   "Execute Disable Bit",
2379   "",
2380   "",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "RDTSCP",
2386   "",
2387   "Intel 64 Architecture",
2388   "",
2389   ""
2390 };
2391 
2392 const char* const _feature_ecx_id[] = {
2393   "Streaming SIMD Extensions 3",
2394   "PCLMULQDQ",
2395   "64-bit DS Area",
2396   "MONITOR/MWAIT instructions",
2397   "CPL Qualified Debug Store",
2398   "Virtual Machine Extensions",
2399   "Safer Mode Extensions",
2400   "Enhanced Intel SpeedStep technology",
2401   "Thermal Monitor 2",
2402   "Supplemental Streaming SIMD Extensions 3",
2403   "L1 Context ID",
2404   "",
2405   "Fused Multiply-Add",
2406   "CMPXCHG16B",
2407   "xTPR Update Control",
2408   "Perfmon and Debug Capability",
2409   "",
2410   "Process-context identifiers",
2411   "Direct Cache Access",
2412   "Streaming SIMD extensions 4.1",
2413   "Streaming SIMD extensions 4.2",
2414   "x2APIC",
2415   "MOVBE",
2416   "Popcount instruction",
2417   "TSC-Deadline",
2418   "AESNI",
2419   "XSAVE",
2420   "OSXSAVE",
2421   "AVX",
2422   "F16C",
2423   "RDRAND",
2424   ""
2425 };
2426 
2427 const char* const _feature_extended_ecx_id[] = {
2428   "LAHF/SAHF instruction support",
2429   "Core multi-processor legacy mode",
2430   "",
2431   "",
2432   "",
2433   "Advanced Bit Manipulations: LZCNT",
2434   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2435   "Misaligned SSE mode",
2436   "",
2437   "",
2438   "",
2439   "",
2440   "",
2441   "",
2442   "",
2443   "",
2444   "",
2445   "",
2446   "",
2447   "",
2448   "",
2449   "",
2450   "",
2451   "",
2452   "",
2453   "",
2454   "",
2455   "",
2456   "",
2457   "",
2458   "",
2459   ""
2460 };
2461 
2462 void VM_Version::initialize_tsc(void) {
2463   ResourceMark rm;
2464 
2465   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2466   if (cpuid_brand_string_stub_blob == nullptr) {
2467     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2468   }
2469   CodeBuffer c(cpuid_brand_string_stub_blob);
2470   VM_Version_StubGenerator g(&c);
2471   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2472                                    g.generate_getCPUIDBrandString());
2473 }
2474 
2475 const char* VM_Version::cpu_model_description(void) {
2476   uint32_t cpu_family = extended_cpu_family();
2477   uint32_t cpu_model = extended_cpu_model();
2478   const char* model = nullptr;
2479 
2480   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2481     for (uint32_t i = 0; i <= cpu_model; i++) {
2482       model = _model_id_pentium_pro[i];
2483       if (model == nullptr) {
2484         break;
2485       }
2486     }
2487   }
2488   return model;
2489 }
2490 
2491 const char* VM_Version::cpu_brand_string(void) {
2492   if (_cpu_brand_string == nullptr) {
2493     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2494     if (nullptr == _cpu_brand_string) {
2495       return nullptr;
2496     }
2497     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2498     if (ret_val != OS_OK) {
2499       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2500       _cpu_brand_string = nullptr;
2501     }
2502   }
2503   return _cpu_brand_string;
2504 }
2505 
2506 const char* VM_Version::cpu_brand(void) {
2507   const char*  brand  = nullptr;
2508 
2509   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2510     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2511     brand = _brand_id[0];
2512     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2513       brand = _brand_id[i];
2514     }
2515   }
2516   return brand;
2517 }
2518 
2519 bool VM_Version::cpu_is_em64t(void) {
2520   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2521 }
2522 
2523 bool VM_Version::is_netburst(void) {
2524   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2525 }
2526 
2527 bool VM_Version::supports_tscinv_ext(void) {
2528   if (!supports_tscinv_bit()) {
2529     return false;
2530   }
2531 
2532   if (is_intel()) {
2533     return true;
2534   }
2535 
2536   if (is_amd()) {
2537     return !is_amd_Barcelona();
2538   }
2539 
2540   if (is_hygon()) {
2541     return true;
2542   }
2543 
2544   return false;
2545 }
2546 
2547 void VM_Version::resolve_cpu_information_details(void) {
2548 
2549   // in future we want to base this information on proper cpu
2550   // and cache topology enumeration such as:
2551   // Intel 64 Architecture Processor Topology Enumeration
2552   // which supports system cpu and cache topology enumeration
2553   // either using 2xAPICIDs or initial APICIDs
2554 
2555   // currently only rough cpu information estimates
2556   // which will not necessarily reflect the exact configuration of the system
2557 
2558   // this is the number of logical hardware threads
2559   // visible to the operating system
2560   _no_of_threads = os::processor_count();
2561 
2562   // find out number of threads per cpu package
2563   int threads_per_package = threads_per_core() * cores_per_cpu();
2564 
2565   // use amount of threads visible to the process in order to guess number of sockets
2566   _no_of_sockets = _no_of_threads / threads_per_package;
2567 
2568   // process might only see a subset of the total number of threads
2569   // from a single processor package. Virtualization/resource management for example.
2570   // If so then just write a hard 1 as num of pkgs.
2571   if (0 == _no_of_sockets) {
2572     _no_of_sockets = 1;
2573   }
2574 
2575   // estimate the number of cores
2576   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2577 }
2578 
2579 
2580 const char* VM_Version::cpu_family_description(void) {
2581   int cpu_family_id = extended_cpu_family();
2582   if (is_amd()) {
2583     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2584       return _family_id_amd[cpu_family_id];
2585     }
2586   }
2587   if (is_intel()) {
2588     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2589       return cpu_model_description();
2590     }
2591     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2592       return _family_id_intel[cpu_family_id];
2593     }
2594   }
2595   if (is_hygon()) {
2596     return "Dhyana";
2597   }
2598   return "Unknown x86";
2599 }
2600 
2601 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2602   assert(buf != nullptr, "buffer is null!");
2603   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2604 
2605   const char* cpu_type = nullptr;
2606   const char* x64 = nullptr;
2607 
2608   if (is_intel()) {
2609     cpu_type = "Intel";
2610     x64 = cpu_is_em64t() ? " Intel64" : "";
2611   } else if (is_amd()) {
2612     cpu_type = "AMD";
2613     x64 = cpu_is_em64t() ? " AMD64" : "";
2614   } else if (is_hygon()) {
2615     cpu_type = "Hygon";
2616     x64 = cpu_is_em64t() ? " AMD64" : "";
2617   } else {
2618     cpu_type = "Unknown x86";
2619     x64 = cpu_is_em64t() ? " x86_64" : "";
2620   }
2621 
2622   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2623     cpu_type,
2624     cpu_family_description(),
2625     supports_ht() ? " (HT)" : "",
2626     supports_sse3() ? " SSE3" : "",
2627     supports_ssse3() ? " SSSE3" : "",
2628     supports_sse4_1() ? " SSE4.1" : "",
2629     supports_sse4_2() ? " SSE4.2" : "",
2630     supports_sse4a() ? " SSE4A" : "",
2631     is_netburst() ? " Netburst" : "",
2632     is_intel_family_core() ? " Core" : "",
2633     x64);
2634 
2635   return OS_OK;
2636 }
2637 
2638 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2639   assert(buf != nullptr, "buffer is null!");
2640   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2641   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2642 
2643   // invoke newly generated asm code to fetch CPU Brand String
2644   getCPUIDBrandString_stub(&_cpuid_info);
2645 
2646   // fetch results into buffer
2647   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2648   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2649   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2650   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2651   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2652   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2653   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2654   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2655   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2656   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2657   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2658   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2659 
2660   return OS_OK;
2661 }
2662 
2663 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2664   guarantee(buf != nullptr, "buffer is null!");
2665   guarantee(buf_len > 0, "buffer len not enough!");
2666 
2667   unsigned int flag = 0;
2668   unsigned int fi = 0;
2669   size_t       written = 0;
2670   const char*  prefix = "";
2671 
2672 #define WRITE_TO_BUF(string)                                                          \
2673   {                                                                                   \
2674     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2675     if (res < 0) {                                                                    \
2676       return buf_len - 1;                                                             \
2677     }                                                                                 \
2678     written += res;                                                                   \
2679     if (prefix[0] == '\0') {                                                          \
2680       prefix = ", ";                                                                  \
2681     }                                                                                 \
2682   }
2683 
2684   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2685     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2686       continue; /* no hyperthreading */
2687     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2688       continue; /* no fast system call */
2689     }
2690     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2691       WRITE_TO_BUF(_feature_edx_id[fi]);
2692     }
2693   }
2694 
2695   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2696     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2697       WRITE_TO_BUF(_feature_ecx_id[fi]);
2698     }
2699   }
2700 
2701   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2702     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2703       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2704     }
2705   }
2706 
2707   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2708     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2709       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2710     }
2711   }
2712 
2713   if (supports_tscinv_bit()) {
2714       WRITE_TO_BUF("Invariant TSC");
2715   }
2716 
2717   return written;
2718 }
2719 
2720 /**
2721  * Write a detailed description of the cpu to a given buffer, including
2722  * feature set.
2723  */
2724 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2725   assert(buf != nullptr, "buffer is null!");
2726   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2727 
2728   static const char* unknown = "<unknown>";
2729   char               vendor_id[VENDOR_LENGTH];
2730   const char*        family = nullptr;
2731   const char*        model = nullptr;
2732   const char*        brand = nullptr;
2733   int                outputLen = 0;
2734 
2735   family = cpu_family_description();
2736   if (family == nullptr) {
2737     family = unknown;
2738   }
2739 
2740   model = cpu_model_description();
2741   if (model == nullptr) {
2742     model = unknown;
2743   }
2744 
2745   brand = cpu_brand_string();
2746 
2747   if (brand == nullptr) {
2748     brand = cpu_brand();
2749     if (brand == nullptr) {
2750       brand = unknown;
2751     }
2752   }
2753 
2754   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2755   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2756   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2757   vendor_id[VENDOR_LENGTH-1] = '\0';
2758 
2759   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2760     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2761     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2762     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2763     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2764     "Supports: ",
2765     brand,
2766     vendor_id,
2767     family,
2768     extended_cpu_family(),
2769     model,
2770     extended_cpu_model(),
2771     cpu_stepping(),
2772     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2773     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2774     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2775     _cpuid_info.std_cpuid1_eax.value,
2776     _cpuid_info.std_cpuid1_ebx.value,
2777     _cpuid_info.std_cpuid1_ecx.value,
2778     _cpuid_info.std_cpuid1_edx.value,
2779     _cpuid_info.ext_cpuid1_eax,
2780     _cpuid_info.ext_cpuid1_ebx,
2781     _cpuid_info.ext_cpuid1_ecx,
2782     _cpuid_info.ext_cpuid1_edx);
2783 
2784   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2785     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2786     return OS_ERR;
2787   }
2788 
2789   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2790 
2791   return OS_OK;
2792 }
2793 
2794 
2795 // Fill in Abstract_VM_Version statics
2796 void VM_Version::initialize_cpu_information() {
2797   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2798   assert(!_initialized, "shouldn't be initialized yet");
2799   resolve_cpu_information_details();
2800 
2801   // initialize cpu_name and cpu_desc
2802   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2803   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2804   _initialized = true;
2805 }
2806 
2807 /**
2808  *  For information about extracting the frequency from the cpu brand string, please see:
2809  *
2810  *    Intel Processor Identification and the CPUID Instruction
2811  *    Application Note 485
2812  *    May 2012
2813  *
2814  * The return value is the frequency in Hz.
2815  */
2816 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2817   const char* const brand_string = cpu_brand_string();
2818   if (brand_string == nullptr) {
2819     return 0;
2820   }
2821   const int64_t MEGA = 1000000;
2822   int64_t multiplier = 0;
2823   int64_t frequency = 0;
2824   uint8_t idx = 0;
2825   // The brand string buffer is at most 48 bytes.
2826   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2827   for (; idx < 48-2; ++idx) {
2828     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2829     // Search brand string for "yHz" where y is M, G, or T.
2830     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2831       if (brand_string[idx] == 'M') {
2832         multiplier = MEGA;
2833       } else if (brand_string[idx] == 'G') {
2834         multiplier = MEGA * 1000;
2835       } else if (brand_string[idx] == 'T') {
2836         multiplier = MEGA * MEGA;
2837       }
2838       break;
2839     }
2840   }
2841   if (multiplier > 0) {
2842     // Compute frequency (in Hz) from brand string.
2843     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2844       frequency =  (brand_string[idx-4] - '0') * multiplier;
2845       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2846       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2847     } else { // format is "xxxx"
2848       frequency =  (brand_string[idx-4] - '0') * 1000;
2849       frequency += (brand_string[idx-3] - '0') * 100;
2850       frequency += (brand_string[idx-2] - '0') * 10;
2851       frequency += (brand_string[idx-1] - '0');
2852       frequency *= multiplier;
2853     }
2854   }
2855   return frequency;
2856 }
2857 
2858 
2859 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2860   if (_max_qualified_cpu_frequency == 0) {
2861     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2862   }
2863   return _max_qualified_cpu_frequency;
2864 }
2865 
2866 uint64_t VM_Version::CpuidInfo::feature_flags() const {
2867   uint64_t result = 0;
2868   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2869     result |= CPU_CX8;
2870   if (std_cpuid1_edx.bits.cmov != 0)
2871     result |= CPU_CMOV;
2872   if (std_cpuid1_edx.bits.clflush != 0)
2873     result |= CPU_FLUSH;
2874   // clflush should always be available on x86_64
2875   // if not we are in real trouble because we rely on it
2876   // to flush the code cache.
2877   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2878   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2879       ext_cpuid1_edx.bits.fxsr != 0))
2880     result |= CPU_FXSR;
2881   // HT flag is set for multi-core processors also.
2882   if (threads_per_core() > 1)
2883     result |= CPU_HT;
2884   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2885       ext_cpuid1_edx.bits.mmx != 0))
2886     result |= CPU_MMX;
2887   if (std_cpuid1_edx.bits.sse != 0)
2888     result |= CPU_SSE;
2889   if (std_cpuid1_edx.bits.sse2 != 0)
2890     result |= CPU_SSE2;
2891   if (std_cpuid1_ecx.bits.sse3 != 0)
2892     result |= CPU_SSE3;
2893   if (std_cpuid1_ecx.bits.ssse3 != 0)
2894     result |= CPU_SSSE3;
2895   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2896     result |= CPU_SSE4_1;
2897   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2898     result |= CPU_SSE4_2;
2899   if (std_cpuid1_ecx.bits.popcnt != 0)
2900     result |= CPU_POPCNT;
2901   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2902       xem_xcr0_eax.bits.apx_f != 0) {
2903     result |= CPU_APX_F;
2904   }
2905   if (std_cpuid1_ecx.bits.avx != 0 &&
2906       std_cpuid1_ecx.bits.osxsave != 0 &&
2907       xem_xcr0_eax.bits.sse != 0 &&
2908       xem_xcr0_eax.bits.ymm != 0) {
2909     result |= CPU_AVX;
2910     result |= CPU_VZEROUPPER;
2911     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2912       result |= CPU_SHA512;
2913     if (std_cpuid1_ecx.bits.f16c != 0)
2914       result |= CPU_F16C;
2915     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2916       result |= CPU_AVX2;
2917       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2918         result |= CPU_AVX_IFMA;
2919     }
2920     if (sef_cpuid7_ecx.bits.gfni != 0)
2921         result |= CPU_GFNI;
2922     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2923         xem_xcr0_eax.bits.opmask != 0 &&
2924         xem_xcr0_eax.bits.zmm512 != 0 &&
2925         xem_xcr0_eax.bits.zmm32 != 0) {
2926       result |= CPU_AVX512F;
2927       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2928         result |= CPU_AVX512CD;
2929       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2930         result |= CPU_AVX512DQ;
2931       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2932         result |= CPU_AVX512_IFMA;
2933       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2934         result |= CPU_AVX512PF;
2935       if (sef_cpuid7_ebx.bits.avx512er != 0)
2936         result |= CPU_AVX512ER;
2937       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2938         result |= CPU_AVX512BW;
2939       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2940         result |= CPU_AVX512VL;
2941       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2942         result |= CPU_AVX512_VPOPCNTDQ;
2943       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2944         result |= CPU_AVX512_VPCLMULQDQ;
2945       if (sef_cpuid7_ecx.bits.vaes != 0)
2946         result |= CPU_AVX512_VAES;
2947       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2948         result |= CPU_AVX512_VNNI;
2949       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2950         result |= CPU_AVX512_BITALG;
2951       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2952         result |= CPU_AVX512_VBMI;
2953       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2954         result |= CPU_AVX512_VBMI2;
2955     }
2956   }
2957   if (std_cpuid1_ecx.bits.hv != 0)
2958     result |= CPU_HV;
2959   if (sef_cpuid7_ebx.bits.bmi1 != 0)
2960     result |= CPU_BMI1;
2961   if (std_cpuid1_edx.bits.tsc != 0)
2962     result |= CPU_TSC;
2963   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
2964     result |= CPU_TSCINV_BIT;
2965   if (std_cpuid1_ecx.bits.aes != 0)
2966     result |= CPU_AES;
2967   if (ext_cpuid1_ecx.bits.lzcnt != 0)
2968     result |= CPU_LZCNT;
2969   if (ext_cpuid1_ecx.bits.prefetchw != 0)
2970     result |= CPU_3DNOW_PREFETCH;
2971   if (sef_cpuid7_ebx.bits.erms != 0)
2972     result |= CPU_ERMS;
2973   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2974     result |= CPU_FSRM;
2975   if (std_cpuid1_ecx.bits.clmul != 0)
2976     result |= CPU_CLMUL;
2977   if (sef_cpuid7_ebx.bits.rtm != 0)
2978     result |= CPU_RTM;
2979   if (sef_cpuid7_ebx.bits.adx != 0)
2980      result |= CPU_ADX;
2981   if (sef_cpuid7_ebx.bits.bmi2 != 0)
2982     result |= CPU_BMI2;
2983   if (sef_cpuid7_ebx.bits.sha != 0)
2984     result |= CPU_SHA;
2985   if (std_cpuid1_ecx.bits.fma != 0)
2986     result |= CPU_FMA;
2987   if (sef_cpuid7_ebx.bits.clflushopt != 0)
2988     result |= CPU_FLUSHOPT;
2989   if (sef_cpuid7_ebx.bits.clwb != 0)
2990     result |= CPU_CLWB;
2991   if (ext_cpuid1_edx.bits.rdtscp != 0)
2992     result |= CPU_RDTSCP;
2993   if (sef_cpuid7_ecx.bits.rdpid != 0)
2994     result |= CPU_RDPID;
2995 
2996   // AMD|Hygon additional features.
2997   if (is_amd_family()) {
2998     // PREFETCHW was checked above, check TDNOW here.
2999     if ((ext_cpuid1_edx.bits.tdnow != 0))
3000       result |= CPU_3DNOW_PREFETCH;
3001     if (ext_cpuid1_ecx.bits.sse4a != 0)
3002       result |= CPU_SSE4A;
3003   }
3004 
3005   // Intel additional features.
3006   if (is_intel()) {
3007     if (sef_cpuid7_edx.bits.serialize != 0)
3008       result |= CPU_SERIALIZE;
3009     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3010       result |= CPU_AVX512_FP16;
3011   }
3012 
3013   // ZX additional features.
3014   if (is_zx()) {
3015     // We do not know if these are supported by ZX, so we cannot trust
3016     // common CPUID bit for them.
3017     assert((result & CPU_CLWB) == 0, "Check if it is supported?");
3018     result &= ~CPU_CLWB;
3019   }
3020 
3021   // Protection key features.
3022   if (sef_cpuid7_ecx.bits.pku != 0) {
3023     result |= CPU_PKU;
3024   }
3025   if (sef_cpuid7_ecx.bits.ospke != 0) {
3026     result |= CPU_OSPKE;
3027   }
3028 
3029   // Control flow enforcement (CET) features.
3030   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3031     result |= CPU_CET_SS;
3032   }
3033   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3034     result |= CPU_CET_IBT;
3035   }
3036 
3037   // Composite features.
3038   if (supports_tscinv_bit() &&
3039       ((is_amd_family() && !is_amd_Barcelona()) ||
3040        is_intel_tsc_synched_at_init())) {
3041     result |= CPU_TSCINV;
3042   }
3043 
3044   return result;
3045 }
3046 
3047 bool VM_Version::os_supports_avx_vectors() {
3048   bool retVal = false;
3049   int nreg = 4;
3050   if (supports_evex()) {
3051     // Verify that OS save/restore all bits of EVEX registers
3052     // during signal processing.
3053     retVal = true;
3054     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3055       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3056         retVal = false;
3057         break;
3058       }
3059     }
3060   } else if (supports_avx()) {
3061     // Verify that OS save/restore all bits of AVX registers
3062     // during signal processing.
3063     retVal = true;
3064     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3065       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3066         retVal = false;
3067         break;
3068       }
3069     }
3070     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3071     if (retVal == false) {
3072       // Verify that OS save/restore all bits of EVEX registers
3073       // during signal processing.
3074       retVal = true;
3075       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3076         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3077           retVal = false;
3078           break;
3079         }
3080       }
3081     }
3082   }
3083   return retVal;
3084 }
3085 
3086 bool VM_Version::os_supports_apx_egprs() {
3087   if (!supports_apx_f()) {
3088     return false;
3089   }
3090   // Enable APX support for product builds after
3091   // completion of planned features listed in JDK-8329030.
3092 #if !defined(PRODUCT)
3093   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3094       _cpuid_info.apx_save[1] != egpr_test_value()) {
3095     return false;
3096   }
3097   return true;
3098 #else
3099   return false;
3100 #endif
3101 }
3102 
3103 uint VM_Version::cores_per_cpu() {
3104   uint result = 1;
3105   if (is_intel()) {
3106     bool supports_topology = supports_processor_topology();
3107     if (supports_topology) {
3108       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3109                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3110     }
3111     if (!supports_topology || result == 0) {
3112       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3113     }
3114   } else if (is_amd_family()) {
3115     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3116   } else if (is_zx()) {
3117     bool supports_topology = supports_processor_topology();
3118     if (supports_topology) {
3119       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3120                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3121     }
3122     if (!supports_topology || result == 0) {
3123       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3124     }
3125   }
3126   return result;
3127 }
3128 
3129 uint VM_Version::threads_per_core() {
3130   uint result = 1;
3131   if (is_intel() && supports_processor_topology()) {
3132     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3133   } else if (is_zx() && supports_processor_topology()) {
3134     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3135   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3136     if (cpu_family() >= 0x17) {
3137       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3138     } else {
3139       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3140                  cores_per_cpu();
3141     }
3142   }
3143   return (result == 0 ? 1 : result);
3144 }
3145 
3146 uint VM_Version::L1_line_size() {
3147   uint result = 0;
3148   if (is_intel()) {
3149     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3150   } else if (is_amd_family()) {
3151     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3152   } else if (is_zx()) {
3153     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3154   }
3155   if (result < 32) // not defined ?
3156     result = 32;   // 32 bytes by default on x86 and other x64
3157   return result;
3158 }
3159 
3160 bool VM_Version::is_intel_tsc_synched_at_init() {
3161   if (is_intel_family_core()) {
3162     uint32_t ext_model = extended_cpu_model();
3163     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3164         ext_model == CPU_MODEL_WESTMERE_EP    ||
3165         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3166         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3167       // <= 2-socket invariant tsc support. EX versions are usually used
3168       // in > 2-socket systems and likely don't synchronize tscs at
3169       // initialization.
3170       // Code that uses tsc values must be prepared for them to arbitrarily
3171       // jump forward or backward.
3172       return true;
3173     }
3174   }
3175   return false;
3176 }
3177 
3178 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3179   // Hardware prefetching (distance/size in bytes):
3180   // Pentium 3 -  64 /  32
3181   // Pentium 4 - 256 / 128
3182   // Athlon    -  64 /  32 ????
3183   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3184   // Core      - 128 /  64
3185   //
3186   // Software prefetching (distance in bytes / instruction with best score):
3187   // Pentium 3 - 128 / prefetchnta
3188   // Pentium 4 - 512 / prefetchnta
3189   // Athlon    - 128 / prefetchnta
3190   // Opteron   - 256 / prefetchnta
3191   // Core      - 256 / prefetchnta
3192   // It will be used only when AllocatePrefetchStyle > 0
3193 
3194   if (is_amd_family()) { // AMD | Hygon
3195     if (supports_sse2()) {
3196       return 256; // Opteron
3197     } else {
3198       return 128; // Athlon
3199     }
3200   } else { // Intel
3201     if (supports_sse3() && cpu_family() == 6) {
3202       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3203         return 192;
3204       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3205         return 384;
3206       }
3207     }
3208     if (supports_sse2()) {
3209       if (cpu_family() == 6) {
3210         return 256; // Pentium M, Core, Core2
3211       } else {
3212         return 512; // Pentium 4
3213       }
3214     } else {
3215       return 128; // Pentium 3 (and all other old CPUs)
3216     }
3217   }
3218 }
3219 
3220 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3221   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3222   switch (id) {
3223   case vmIntrinsics::_floatToFloat16:
3224   case vmIntrinsics::_float16ToFloat:
3225     if (!supports_float16()) {
3226       return false;
3227     }
3228     break;
3229   default:
3230     break;
3231   }
3232   return true;
3233 }