1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2000;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76 }
  77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  78 static detect_virt_stub_t detect_virt_stub = nullptr;
  79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  80 
  81 bool VM_Version::supports_clflush() {
  82   // clflush should always be available on x86_64
  83   // if not we are in real trouble because we rely on it
  84   // to flush the code cache.
  85   // Unfortunately, Assembler::clflush is currently called as part
  86   // of generation of the code cache flush routine. This happens
  87   // under Universe::init before the processor features are set
  88   // up. Assembler::flush calls this routine to check that clflush
  89   // is allowed. So, we give the caller a free pass if Universe init
  90   // is still in progress.
  91   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  92   return true;
  93 }
  94 
  95 #define CPUID_STANDARD_FN   0x0
  96 #define CPUID_STANDARD_FN_1 0x1
  97 #define CPUID_STANDARD_FN_4 0x4
  98 #define CPUID_STANDARD_FN_B 0xb
  99 
 100 #define CPUID_EXTENDED_FN   0x80000000
 101 #define CPUID_EXTENDED_FN_1 0x80000001
 102 #define CPUID_EXTENDED_FN_2 0x80000002
 103 #define CPUID_EXTENDED_FN_3 0x80000003
 104 #define CPUID_EXTENDED_FN_4 0x80000004
 105 #define CPUID_EXTENDED_FN_7 0x80000007
 106 #define CPUID_EXTENDED_FN_8 0x80000008
 107 
 108 class VM_Version_StubGenerator: public StubCodeGenerator {
 109  public:
 110 
 111   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 112 
 113   address clear_apx_test_state() {
 114 #   define __ _masm->
 115     address start = __ pc();
 116     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 117     // handling guarantees that preserved register values post signal handling were
 118     // re-instantiated by operating system and not because they were not modified externally.
 119 
 120     bool save_apx = UseAPX;
 121     VM_Version::set_apx_cpuFeatures();
 122     UseAPX = true;
 123     // EGPR state save/restoration.
 124     __ mov64(r16, 0L);
 125     __ mov64(r31, 0L);
 126     UseAPX = save_apx;
 127     VM_Version::clean_cpuFeatures();
 128     __ ret(0);
 129     return start;
 130   }
 131 
 132   address generate_get_cpu_info() {
 133     // Flags to test CPU type.
 134     const uint32_t HS_EFL_AC = 0x40000;
 135     const uint32_t HS_EFL_ID = 0x200000;
 136     // Values for when we don't have a CPUID instruction.
 137     const int      CPU_FAMILY_SHIFT = 8;
 138     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 139     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 140     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 141 
 142     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 143     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 144     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 145     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 146 
 147     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 148 #   define __ _masm->
 149 
 150     address start = __ pc();
 151 
 152     //
 153     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 154     //
 155     // rcx and rdx are first and second argument registers on windows
 156 
 157     __ push(rbp);
 158     __ mov(rbp, c_rarg0); // cpuid_info address
 159     __ push(rbx);
 160     __ push(rsi);
 161     __ pushf();          // preserve rbx, and flags
 162     __ pop(rax);
 163     __ push(rax);
 164     __ mov(rcx, rax);
 165     //
 166     // if we are unable to change the AC flag, we have a 386
 167     //
 168     __ xorl(rax, HS_EFL_AC);
 169     __ push(rax);
 170     __ popf();
 171     __ pushf();
 172     __ pop(rax);
 173     __ cmpptr(rax, rcx);
 174     __ jccb(Assembler::notEqual, detect_486);
 175 
 176     __ movl(rax, CPU_FAMILY_386);
 177     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 178     __ jmp(done);
 179 
 180     //
 181     // If we are unable to change the ID flag, we have a 486 which does
 182     // not support the "cpuid" instruction.
 183     //
 184     __ bind(detect_486);
 185     __ mov(rax, rcx);
 186     __ xorl(rax, HS_EFL_ID);
 187     __ push(rax);
 188     __ popf();
 189     __ pushf();
 190     __ pop(rax);
 191     __ cmpptr(rcx, rax);
 192     __ jccb(Assembler::notEqual, detect_586);
 193 
 194     __ bind(cpu486);
 195     __ movl(rax, CPU_FAMILY_486);
 196     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 197     __ jmp(done);
 198 
 199     //
 200     // At this point, we have a chip which supports the "cpuid" instruction
 201     //
 202     __ bind(detect_586);
 203     __ xorl(rax, rax);
 204     __ cpuid();
 205     __ orl(rax, rax);
 206     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 207                                         // value of at least 1, we give up and
 208                                         // assume a 486
 209     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 210     __ movl(Address(rsi, 0), rax);
 211     __ movl(Address(rsi, 4), rbx);
 212     __ movl(Address(rsi, 8), rcx);
 213     __ movl(Address(rsi,12), rdx);
 214 
 215     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 216     __ jccb(Assembler::belowEqual, std_cpuid4);
 217 
 218     //
 219     // cpuid(0xB) Processor Topology
 220     //
 221     __ movl(rax, 0xb);
 222     __ xorl(rcx, rcx);   // Threads level
 223     __ cpuid();
 224 
 225     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 226     __ movl(Address(rsi, 0), rax);
 227     __ movl(Address(rsi, 4), rbx);
 228     __ movl(Address(rsi, 8), rcx);
 229     __ movl(Address(rsi,12), rdx);
 230 
 231     __ movl(rax, 0xb);
 232     __ movl(rcx, 1);     // Cores level
 233     __ cpuid();
 234     __ push(rax);
 235     __ andl(rax, 0x1f);  // Determine if valid topology level
 236     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 237     __ andl(rax, 0xffff);
 238     __ pop(rax);
 239     __ jccb(Assembler::equal, std_cpuid4);
 240 
 241     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 242     __ movl(Address(rsi, 0), rax);
 243     __ movl(Address(rsi, 4), rbx);
 244     __ movl(Address(rsi, 8), rcx);
 245     __ movl(Address(rsi,12), rdx);
 246 
 247     __ movl(rax, 0xb);
 248     __ movl(rcx, 2);     // Packages level
 249     __ cpuid();
 250     __ push(rax);
 251     __ andl(rax, 0x1f);  // Determine if valid topology level
 252     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 253     __ andl(rax, 0xffff);
 254     __ pop(rax);
 255     __ jccb(Assembler::equal, std_cpuid4);
 256 
 257     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 258     __ movl(Address(rsi, 0), rax);
 259     __ movl(Address(rsi, 4), rbx);
 260     __ movl(Address(rsi, 8), rcx);
 261     __ movl(Address(rsi,12), rdx);
 262 
 263     //
 264     // cpuid(0x4) Deterministic cache params
 265     //
 266     __ bind(std_cpuid4);
 267     __ movl(rax, 4);
 268     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 269     __ jccb(Assembler::greater, std_cpuid1);
 270 
 271     __ xorl(rcx, rcx);   // L1 cache
 272     __ cpuid();
 273     __ push(rax);
 274     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 275     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 276     __ pop(rax);
 277     __ jccb(Assembler::equal, std_cpuid1);
 278 
 279     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 280     __ movl(Address(rsi, 0), rax);
 281     __ movl(Address(rsi, 4), rbx);
 282     __ movl(Address(rsi, 8), rcx);
 283     __ movl(Address(rsi,12), rdx);
 284 
 285     //
 286     // Standard cpuid(0x1)
 287     //
 288     __ bind(std_cpuid1);
 289     __ movl(rax, 1);
 290     __ cpuid();
 291     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 292     __ movl(Address(rsi, 0), rax);
 293     __ movl(Address(rsi, 4), rbx);
 294     __ movl(Address(rsi, 8), rcx);
 295     __ movl(Address(rsi,12), rdx);
 296 
 297     //
 298     // Check if OS has enabled XGETBV instruction to access XCR0
 299     // (OSXSAVE feature flag) and CPU supports AVX
 300     //
 301     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 302     __ cmpl(rcx, 0x18000000);
 303     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 304 
 305     //
 306     // XCR0, XFEATURE_ENABLED_MASK register
 307     //
 308     __ xorl(rcx, rcx);   // zero for XCR0 register
 309     __ xgetbv();
 310     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 311     __ movl(Address(rsi, 0), rax);
 312     __ movl(Address(rsi, 4), rdx);
 313 
 314     //
 315     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 316     //
 317     __ bind(sef_cpuid);
 318     __ movl(rax, 7);
 319     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 320     __ jccb(Assembler::greater, ext_cpuid);
 321     // ECX = 0
 322     __ xorl(rcx, rcx);
 323     __ cpuid();
 324     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 325     __ movl(Address(rsi, 0), rax);
 326     __ movl(Address(rsi, 4), rbx);
 327     __ movl(Address(rsi, 8), rcx);
 328     __ movl(Address(rsi, 12), rdx);
 329 
 330     //
 331     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 332     //
 333     __ bind(sefsl1_cpuid);
 334     __ movl(rax, 7);
 335     __ movl(rcx, 1);
 336     __ cpuid();
 337     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 338     __ movl(Address(rsi, 0), rax);
 339     __ movl(Address(rsi, 4), rdx);
 340 
 341     //
 342     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 343     //
 344     __ bind(std_cpuid29);
 345     __ movl(rax, 0x29);
 346     __ movl(rcx, 0);
 347     __ cpuid();
 348     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 349     __ movl(Address(rsi, 0), rbx);
 350 
 351     //
 352     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 353     //
 354     __ bind(std_cpuid24);
 355     __ movl(rax, 0x24);
 356     __ movl(rcx, 0);
 357     __ cpuid();
 358     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 359     __ movl(Address(rsi, 0), rax);
 360     __ movl(Address(rsi, 4), rbx);
 361 
 362     //
 363     // Extended cpuid(0x80000000)
 364     //
 365     __ bind(ext_cpuid);
 366     __ movl(rax, 0x80000000);
 367     __ cpuid();
 368     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 369     __ jcc(Assembler::belowEqual, done);
 370     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 371     __ jcc(Assembler::belowEqual, ext_cpuid1);
 372     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 373     __ jccb(Assembler::belowEqual, ext_cpuid5);
 374     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 375     __ jccb(Assembler::belowEqual, ext_cpuid7);
 376     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 377     __ jccb(Assembler::belowEqual, ext_cpuid8);
 378     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 379     __ jccb(Assembler::below, ext_cpuid8);
 380     //
 381     // Extended cpuid(0x8000001E)
 382     //
 383     __ movl(rax, 0x8000001E);
 384     __ cpuid();
 385     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 386     __ movl(Address(rsi, 0), rax);
 387     __ movl(Address(rsi, 4), rbx);
 388     __ movl(Address(rsi, 8), rcx);
 389     __ movl(Address(rsi,12), rdx);
 390 
 391     //
 392     // Extended cpuid(0x80000008)
 393     //
 394     __ bind(ext_cpuid8);
 395     __ movl(rax, 0x80000008);
 396     __ cpuid();
 397     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 398     __ movl(Address(rsi, 0), rax);
 399     __ movl(Address(rsi, 4), rbx);
 400     __ movl(Address(rsi, 8), rcx);
 401     __ movl(Address(rsi,12), rdx);
 402 
 403     //
 404     // Extended cpuid(0x80000007)
 405     //
 406     __ bind(ext_cpuid7);
 407     __ movl(rax, 0x80000007);
 408     __ cpuid();
 409     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 410     __ movl(Address(rsi, 0), rax);
 411     __ movl(Address(rsi, 4), rbx);
 412     __ movl(Address(rsi, 8), rcx);
 413     __ movl(Address(rsi,12), rdx);
 414 
 415     //
 416     // Extended cpuid(0x80000005)
 417     //
 418     __ bind(ext_cpuid5);
 419     __ movl(rax, 0x80000005);
 420     __ cpuid();
 421     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 422     __ movl(Address(rsi, 0), rax);
 423     __ movl(Address(rsi, 4), rbx);
 424     __ movl(Address(rsi, 8), rcx);
 425     __ movl(Address(rsi,12), rdx);
 426 
 427     //
 428     // Extended cpuid(0x80000001)
 429     //
 430     __ bind(ext_cpuid1);
 431     __ movl(rax, 0x80000001);
 432     __ cpuid();
 433     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 434     __ movl(Address(rsi, 0), rax);
 435     __ movl(Address(rsi, 4), rbx);
 436     __ movl(Address(rsi, 8), rcx);
 437     __ movl(Address(rsi,12), rdx);
 438 
 439     //
 440     // Check if OS has enabled XGETBV instruction to access XCR0
 441     // (OSXSAVE feature flag) and CPU supports APX
 442     //
 443     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 444     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 445     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 446     __ movl(rax, 0x200000);
 447     __ andl(rax, Address(rsi, 4));
 448     __ jcc(Assembler::equal, vector_save_restore);
 449     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 450     __ movl(rax, 0x80000);
 451     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 452     __ jcc(Assembler::equal, vector_save_restore);
 453 
 454     bool save_apx = UseAPX;
 455     VM_Version::set_apx_cpuFeatures();
 456     UseAPX = true;
 457     __ mov64(r16, VM_Version::egpr_test_value());
 458     __ mov64(r31, VM_Version::egpr_test_value());
 459     __ xorl(rsi, rsi);
 460     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 461     // Generate SEGV
 462     __ movl(rax, Address(rsi, 0));
 463 
 464     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 465     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 466     __ movq(Address(rsi, 0), r16);
 467     __ movq(Address(rsi, 8), r31);
 468 
 469     UseAPX = save_apx;
 470     __ bind(vector_save_restore);
 471     //
 472     // Check if OS has enabled XGETBV instruction to access XCR0
 473     // (OSXSAVE feature flag) and CPU supports AVX
 474     //
 475     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 476     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 477     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 478     __ cmpl(rcx, 0x18000000);
 479     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 480 
 481     __ movl(rax, 0x6);
 482     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 483     __ cmpl(rax, 0x6);
 484     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 485 
 486     // we need to bridge farther than imm8, so we use this island as a thunk
 487     __ bind(done);
 488     __ jmp(wrapup);
 489 
 490     __ bind(start_simd_check);
 491     //
 492     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 493     // registers are not restored after a signal processing.
 494     // Generate SEGV here (reference through null)
 495     // and check upper YMM/ZMM bits after it.
 496     //
 497     int saved_useavx = UseAVX;
 498     int saved_usesse = UseSSE;
 499 
 500     // If UseAVX is uninitialized or is set by the user to include EVEX
 501     if (use_evex) {
 502       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 503       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 504       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 505       __ movl(rax, 0x10000);
 506       __ andl(rax, Address(rsi, 4));
 507       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 508       __ movl(rbx, 0x80000);
 509       __ andl(rbx, Address(rsi, 4));
 510       __ orl(rax, rbx);
 511       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 512       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 513       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 514       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 515       __ movl(rax, 0xE0);
 516       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 517       __ cmpl(rax, 0xE0);
 518       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 519 
 520       if (FLAG_IS_DEFAULT(UseAVX)) {
 521         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 522         __ movl(rax, Address(rsi, 0));
 523         __ cmpl(rax, 0x50654);              // If it is Skylake
 524         __ jcc(Assembler::equal, legacy_setup);
 525       }
 526       // EVEX setup: run in lowest evex mode
 527       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 528       UseAVX = 3;
 529       UseSSE = 2;
 530 #ifdef _WINDOWS
 531       // xmm5-xmm15 are not preserved by caller on windows
 532       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 533       __ subptr(rsp, 64);
 534       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 535       __ subptr(rsp, 64);
 536       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 537       __ subptr(rsp, 64);
 538       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 539 #endif // _WINDOWS
 540 
 541       // load value into all 64 bytes of zmm7 register
 542       __ movl(rcx, VM_Version::ymm_test_value());
 543       __ movdl(xmm0, rcx);
 544       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 545       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 546       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 547       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 548       VM_Version::clean_cpuFeatures();
 549       __ jmp(save_restore_except);
 550     }
 551 
 552     __ bind(legacy_setup);
 553     // AVX setup
 554     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 555     UseAVX = 1;
 556     UseSSE = 2;
 557 #ifdef _WINDOWS
 558     __ subptr(rsp, 32);
 559     __ vmovdqu(Address(rsp, 0), xmm7);
 560     __ subptr(rsp, 32);
 561     __ vmovdqu(Address(rsp, 0), xmm8);
 562     __ subptr(rsp, 32);
 563     __ vmovdqu(Address(rsp, 0), xmm15);
 564 #endif // _WINDOWS
 565 
 566     // load value into all 32 bytes of ymm7 register
 567     __ movl(rcx, VM_Version::ymm_test_value());
 568 
 569     __ movdl(xmm0, rcx);
 570     __ pshufd(xmm0, xmm0, 0x00);
 571     __ vinsertf128_high(xmm0, xmm0);
 572     __ vmovdqu(xmm7, xmm0);
 573     __ vmovdqu(xmm8, xmm0);
 574     __ vmovdqu(xmm15, xmm0);
 575     VM_Version::clean_cpuFeatures();
 576 
 577     __ bind(save_restore_except);
 578     __ xorl(rsi, rsi);
 579     VM_Version::set_cpuinfo_segv_addr(__ pc());
 580     // Generate SEGV
 581     __ movl(rax, Address(rsi, 0));
 582 
 583     VM_Version::set_cpuinfo_cont_addr(__ pc());
 584     // Returns here after signal. Save xmm0 to check it later.
 585 
 586     // If UseAVX is uninitialized or is set by the user to include EVEX
 587     if (use_evex) {
 588       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 589       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 590       __ movl(rax, 0x10000);
 591       __ andl(rax, Address(rsi, 4));
 592       __ jcc(Assembler::equal, legacy_save_restore);
 593       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 594       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 595       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 596       __ movl(rax, 0xE0);
 597       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 598       __ cmpl(rax, 0xE0);
 599       __ jcc(Assembler::notEqual, legacy_save_restore);
 600 
 601       if (FLAG_IS_DEFAULT(UseAVX)) {
 602         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 603         __ movl(rax, Address(rsi, 0));
 604         __ cmpl(rax, 0x50654);              // If it is Skylake
 605         __ jcc(Assembler::equal, legacy_save_restore);
 606       }
 607       // EVEX check: run in lowest evex mode
 608       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 609       UseAVX = 3;
 610       UseSSE = 2;
 611       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 612       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 613       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 614       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 615       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 616 
 617 #ifdef _WINDOWS
 618       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 619       __ addptr(rsp, 64);
 620       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 621       __ addptr(rsp, 64);
 622       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 623       __ addptr(rsp, 64);
 624 #endif // _WINDOWS
 625       generate_vzeroupper(wrapup);
 626       VM_Version::clean_cpuFeatures();
 627       UseAVX = saved_useavx;
 628       UseSSE = saved_usesse;
 629       __ jmp(wrapup);
 630    }
 631 
 632     __ bind(legacy_save_restore);
 633     // AVX check
 634     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 635     UseAVX = 1;
 636     UseSSE = 2;
 637     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 638     __ vmovdqu(Address(rsi, 0), xmm0);
 639     __ vmovdqu(Address(rsi, 32), xmm7);
 640     __ vmovdqu(Address(rsi, 64), xmm8);
 641     __ vmovdqu(Address(rsi, 96), xmm15);
 642 
 643 #ifdef _WINDOWS
 644     __ vmovdqu(xmm15, Address(rsp, 0));
 645     __ addptr(rsp, 32);
 646     __ vmovdqu(xmm8, Address(rsp, 0));
 647     __ addptr(rsp, 32);
 648     __ vmovdqu(xmm7, Address(rsp, 0));
 649     __ addptr(rsp, 32);
 650 #endif // _WINDOWS
 651 
 652     generate_vzeroupper(wrapup);
 653     VM_Version::clean_cpuFeatures();
 654     UseAVX = saved_useavx;
 655     UseSSE = saved_usesse;
 656 
 657     __ bind(wrapup);
 658     __ popf();
 659     __ pop(rsi);
 660     __ pop(rbx);
 661     __ pop(rbp);
 662     __ ret(0);
 663 
 664 #   undef __
 665 
 666     return start;
 667   };
 668   void generate_vzeroupper(Label& L_wrapup) {
 669 #   define __ _masm->
 670     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 671     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 672     __ jcc(Assembler::notEqual, L_wrapup);
 673     __ movl(rcx, 0x0FFF0FF0);
 674     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 675     __ andl(rcx, Address(rsi, 0));
 676     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 677     __ jcc(Assembler::equal, L_wrapup);
 678     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 679     __ jcc(Assembler::equal, L_wrapup);
 680     // vzeroupper() will use a pre-computed instruction sequence that we
 681     // can't compute until after we've determined CPU capabilities. Use
 682     // uncached variant here directly to be able to bootstrap correctly
 683     __ vzeroupper_uncached();
 684 #   undef __
 685   }
 686   address generate_detect_virt() {
 687     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 688 #   define __ _masm->
 689 
 690     address start = __ pc();
 691 
 692     // Evacuate callee-saved registers
 693     __ push(rbp);
 694     __ push(rbx);
 695     __ push(rsi); // for Windows
 696 
 697     __ mov(rax, c_rarg0); // CPUID leaf
 698     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 699 
 700     __ cpuid();
 701 
 702     // Store result to register array
 703     __ movl(Address(rsi,  0), rax);
 704     __ movl(Address(rsi,  4), rbx);
 705     __ movl(Address(rsi,  8), rcx);
 706     __ movl(Address(rsi, 12), rdx);
 707 
 708     // Epilogue
 709     __ pop(rsi);
 710     __ pop(rbx);
 711     __ pop(rbp);
 712     __ ret(0);
 713 
 714 #   undef __
 715 
 716     return start;
 717   };
 718 
 719 
 720   address generate_getCPUIDBrandString(void) {
 721     // Flags to test CPU type.
 722     const uint32_t HS_EFL_AC           = 0x40000;
 723     const uint32_t HS_EFL_ID           = 0x200000;
 724     // Values for when we don't have a CPUID instruction.
 725     const int      CPU_FAMILY_SHIFT = 8;
 726     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 727     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 728 
 729     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 730 
 731     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 732 #   define __ _masm->
 733 
 734     address start = __ pc();
 735 
 736     //
 737     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 738     //
 739     // rcx and rdx are first and second argument registers on windows
 740 
 741     __ push(rbp);
 742     __ mov(rbp, c_rarg0); // cpuid_info address
 743     __ push(rbx);
 744     __ push(rsi);
 745     __ pushf();          // preserve rbx, and flags
 746     __ pop(rax);
 747     __ push(rax);
 748     __ mov(rcx, rax);
 749     //
 750     // if we are unable to change the AC flag, we have a 386
 751     //
 752     __ xorl(rax, HS_EFL_AC);
 753     __ push(rax);
 754     __ popf();
 755     __ pushf();
 756     __ pop(rax);
 757     __ cmpptr(rax, rcx);
 758     __ jccb(Assembler::notEqual, detect_486);
 759 
 760     __ movl(rax, CPU_FAMILY_386);
 761     __ jmp(done);
 762 
 763     //
 764     // If we are unable to change the ID flag, we have a 486 which does
 765     // not support the "cpuid" instruction.
 766     //
 767     __ bind(detect_486);
 768     __ mov(rax, rcx);
 769     __ xorl(rax, HS_EFL_ID);
 770     __ push(rax);
 771     __ popf();
 772     __ pushf();
 773     __ pop(rax);
 774     __ cmpptr(rcx, rax);
 775     __ jccb(Assembler::notEqual, detect_586);
 776 
 777     __ bind(cpu486);
 778     __ movl(rax, CPU_FAMILY_486);
 779     __ jmp(done);
 780 
 781     //
 782     // At this point, we have a chip which supports the "cpuid" instruction
 783     //
 784     __ bind(detect_586);
 785     __ xorl(rax, rax);
 786     __ cpuid();
 787     __ orl(rax, rax);
 788     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 789                                         // value of at least 1, we give up and
 790                                         // assume a 486
 791 
 792     //
 793     // Extended cpuid(0x80000000) for processor brand string detection
 794     //
 795     __ bind(ext_cpuid);
 796     __ movl(rax, CPUID_EXTENDED_FN);
 797     __ cpuid();
 798     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 799     __ jcc(Assembler::below, done);
 800 
 801     //
 802     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 803     //
 804     __ movl(rax, CPUID_EXTENDED_FN_2);
 805     __ cpuid();
 806     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 807     __ movl(Address(rsi, 0), rax);
 808     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 809     __ movl(Address(rsi, 0), rbx);
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 811     __ movl(Address(rsi, 0), rcx);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 813     __ movl(Address(rsi,0), rdx);
 814 
 815     //
 816     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 817     //
 818     __ movl(rax, CPUID_EXTENDED_FN_3);
 819     __ cpuid();
 820     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 821     __ movl(Address(rsi, 0), rax);
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 823     __ movl(Address(rsi, 0), rbx);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 825     __ movl(Address(rsi, 0), rcx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 827     __ movl(Address(rsi,0), rdx);
 828 
 829     //
 830     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 831     //
 832     __ movl(rax, CPUID_EXTENDED_FN_4);
 833     __ cpuid();
 834     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 835     __ movl(Address(rsi, 0), rax);
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 837     __ movl(Address(rsi, 0), rbx);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 839     __ movl(Address(rsi, 0), rcx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 841     __ movl(Address(rsi,0), rdx);
 842 
 843     //
 844     // return
 845     //
 846     __ bind(done);
 847     __ popf();
 848     __ pop(rsi);
 849     __ pop(rbx);
 850     __ pop(rbp);
 851     __ ret(0);
 852 
 853 #   undef __
 854 
 855     return start;
 856   };
 857 };
 858 
 859 void VM_Version::get_processor_features() {
 860 
 861   _cpu = 4; // 486 by default
 862   _model = 0;
 863   _stepping = 0;
 864   _logical_processors_per_package = 1;
 865   // i486 internal cache is both I&D and has a 16-byte line size
 866   _L1_data_cache_line_size = 16;
 867 
 868   // Get raw processor info
 869 
 870   get_cpu_info_stub(&_cpuid_info);
 871 
 872   assert_is_initialized();
 873   _cpu = extended_cpu_family();
 874   _model = extended_cpu_model();
 875   _stepping = cpu_stepping();
 876 
 877   if (cpu_family() > 4) { // it supports CPUID
 878     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 879     _cpu_features = _features; // Preserve features
 880     // Logical processors are only available on P4s and above,
 881     // and only if hyperthreading is available.
 882     _logical_processors_per_package = logical_processor_count();
 883     _L1_data_cache_line_size = L1_line_size();
 884   }
 885 
 886   // xchg and xadd instructions
 887   _supports_atomic_getset4 = true;
 888   _supports_atomic_getadd4 = true;
 889   _supports_atomic_getset8 = true;
 890   _supports_atomic_getadd8 = true;
 891 
 892   // OS should support SSE for x64 and hardware should support at least SSE2.
 893   if (!VM_Version::supports_sse2()) {
 894     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 895   }
 896   // in 64 bit the use of SSE2 is the minimum
 897   if (UseSSE < 2) UseSSE = 2;
 898 
 899   // flush_icache_stub have to be generated first.
 900   // That is why Icache line size is hard coded in ICache class,
 901   // see icache_x86.hpp. It is also the reason why we can't use
 902   // clflush instruction in 32-bit VM since it could be running
 903   // on CPU which does not support it.
 904   //
 905   // The only thing we can do is to verify that flushed
 906   // ICache::line_size has correct value.
 907   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 908   // clflush_size is size in quadwords (8 bytes).
 909   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 910 
 911   // assigning this field effectively enables Unsafe.writebackMemory()
 912   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 913   // that is only implemented on x86_64 and only if the OS plays ball
 914   if (os::supports_map_sync()) {
 915     // publish data cache line flush size to generic field, otherwise
 916     // let if default to zero thereby disabling writeback
 917     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 918   }
 919 
 920   // Check if processor has Intel Ecore
 921   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 922     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 923       _model == 0xCC || _model == 0xDD)) {
 924     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 925   }
 926 
 927   if (UseSSE < 4) {
 928     _features.clear_feature(CPU_SSE4_1);
 929     _features.clear_feature(CPU_SSE4_2);
 930   }
 931 
 932   if (UseSSE < 3) {
 933     _features.clear_feature(CPU_SSE3);
 934     _features.clear_feature(CPU_SSSE3);
 935     _features.clear_feature(CPU_SSE4A);
 936   }
 937 
 938   if (UseSSE < 2)
 939     _features.clear_feature(CPU_SSE2);
 940 
 941   if (UseSSE < 1)
 942     _features.clear_feature(CPU_SSE);
 943 
 944   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 945   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 946     UseAVX = 0;
 947   }
 948 
 949   // UseSSE is set to the smaller of what hardware supports and what
 950   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 951   // older Pentiums which do not support it.
 952   int use_sse_limit = 0;
 953   if (UseSSE > 0) {
 954     if (UseSSE > 3 && supports_sse4_1()) {
 955       use_sse_limit = 4;
 956     } else if (UseSSE > 2 && supports_sse3()) {
 957       use_sse_limit = 3;
 958     } else if (UseSSE > 1 && supports_sse2()) {
 959       use_sse_limit = 2;
 960     } else if (UseSSE > 0 && supports_sse()) {
 961       use_sse_limit = 1;
 962     } else {
 963       use_sse_limit = 0;
 964     }
 965   }
 966   if (FLAG_IS_DEFAULT(UseSSE)) {
 967     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 968   } else if (UseSSE > use_sse_limit) {
 969     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 970     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 971   }
 972 
 973   // first try initial setting and detect what we can support
 974   int use_avx_limit = 0;
 975   if (UseAVX > 0) {
 976     if (UseSSE < 4) {
 977       // Don't use AVX if SSE is unavailable or has been disabled.
 978       use_avx_limit = 0;
 979     } else if (UseAVX > 2 && supports_evex()) {
 980       use_avx_limit = 3;
 981     } else if (UseAVX > 1 && supports_avx2()) {
 982       use_avx_limit = 2;
 983     } else if (UseAVX > 0 && supports_avx()) {
 984       use_avx_limit = 1;
 985     } else {
 986       use_avx_limit = 0;
 987     }
 988   }
 989   if (FLAG_IS_DEFAULT(UseAVX)) {
 990     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 991     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 992       FLAG_SET_DEFAULT(UseAVX, 2);
 993     } else {
 994       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 995     }
 996   }
 997 
 998   if (UseAVX > use_avx_limit) {
 999     if (UseSSE < 4) {
1000       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1001     } else {
1002       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1003     }
1004     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1005   }
1006 
1007   if (UseAVX < 3) {
1008     _features.clear_feature(CPU_AVX512F);
1009     _features.clear_feature(CPU_AVX512DQ);
1010     _features.clear_feature(CPU_AVX512CD);
1011     _features.clear_feature(CPU_AVX512BW);
1012     _features.clear_feature(CPU_AVX512ER);
1013     _features.clear_feature(CPU_AVX512PF);
1014     _features.clear_feature(CPU_AVX512VL);
1015     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1016     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1017     _features.clear_feature(CPU_AVX512_VAES);
1018     _features.clear_feature(CPU_AVX512_VNNI);
1019     _features.clear_feature(CPU_AVX512_VBMI);
1020     _features.clear_feature(CPU_AVX512_VBMI2);
1021     _features.clear_feature(CPU_AVX512_BITALG);
1022     _features.clear_feature(CPU_AVX512_IFMA);
1023     _features.clear_feature(CPU_APX_F);
1024     _features.clear_feature(CPU_AVX512_FP16);
1025     _features.clear_feature(CPU_AVX10_1);
1026     _features.clear_feature(CPU_AVX10_2);
1027   }
1028 
1029 
1030   if (UseAVX < 2) {
1031     _features.clear_feature(CPU_AVX2);
1032     _features.clear_feature(CPU_AVX_IFMA);
1033   }
1034 
1035   if (UseAVX < 1) {
1036     _features.clear_feature(CPU_AVX);
1037     _features.clear_feature(CPU_VZEROUPPER);
1038     _features.clear_feature(CPU_F16C);
1039     _features.clear_feature(CPU_SHA512);
1040   }
1041 
1042   if (logical_processors_per_package() == 1) {
1043     // HT processor could be installed on a system which doesn't support HT.
1044     _features.clear_feature(CPU_HT);
1045   }
1046 
1047   if (is_intel()) { // Intel cpus specific settings
1048     if (is_knights_family()) {
1049       _features.clear_feature(CPU_VZEROUPPER);
1050       _features.clear_feature(CPU_AVX512BW);
1051       _features.clear_feature(CPU_AVX512VL);
1052       _features.clear_feature(CPU_APX_F);
1053       _features.clear_feature(CPU_AVX512DQ);
1054       _features.clear_feature(CPU_AVX512_VNNI);
1055       _features.clear_feature(CPU_AVX512_VAES);
1056       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1057       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1058       _features.clear_feature(CPU_AVX512_VBMI);
1059       _features.clear_feature(CPU_AVX512_VBMI2);
1060       _features.clear_feature(CPU_CLWB);
1061       _features.clear_feature(CPU_FLUSHOPT);
1062       _features.clear_feature(CPU_GFNI);
1063       _features.clear_feature(CPU_AVX512_BITALG);
1064       _features.clear_feature(CPU_AVX512_IFMA);
1065       _features.clear_feature(CPU_AVX_IFMA);
1066       _features.clear_feature(CPU_AVX512_FP16);
1067       _features.clear_feature(CPU_AVX10_1);
1068       _features.clear_feature(CPU_AVX10_2);
1069     }
1070   }
1071 
1072     // Currently APX support is only enabled for targets supporting AVX512VL feature.
1073   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1074   if (UseAPX && !apx_supported) {
1075     warning("UseAPX is not supported on this CPU, setting it to false");
1076     FLAG_SET_DEFAULT(UseAPX, false);
1077   }
1078 
1079   if (!UseAPX) {
1080     _features.clear_feature(CPU_APX_F);
1081   }
1082 
1083   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1084     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1085     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1086   } else {
1087     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1088   }
1089 
1090   assert(supports_clflush(), "Always present");
1091   if (X86ICacheSync == -1) {
1092     // Auto-detect, choosing the best performant one that still flushes
1093     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1094     if (supports_clwb()) {
1095       FLAG_SET_ERGO(X86ICacheSync, 3);
1096     } else if (supports_clflushopt()) {
1097       FLAG_SET_ERGO(X86ICacheSync, 2);
1098     } else {
1099       FLAG_SET_ERGO(X86ICacheSync, 1);
1100     }
1101   } else {
1102     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1103       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1104     }
1105     if ((X86ICacheSync == 3) && !supports_clwb()) {
1106       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1107     }
1108     if ((X86ICacheSync == 5) && !supports_serialize()) {
1109       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1110     }
1111   }
1112 
1113   stringStream ss(2048);
1114   if (supports_hybrid()) {
1115     ss.print("(hybrid)");
1116   } else {
1117     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1118   }
1119   ss.print(" family %d model %d stepping %d microcode 0x%x",
1120            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1121   ss.print(", ");
1122   int features_offset = (int)ss.size();
1123   insert_features_names(_features, ss);
1124 
1125   _cpu_info_string = ss.as_string(true);
1126   _features_string = _cpu_info_string + features_offset;
1127 
1128   // Use AES instructions if available.
1129   if (supports_aes()) {
1130     if (FLAG_IS_DEFAULT(UseAES)) {
1131       FLAG_SET_DEFAULT(UseAES, true);
1132     }
1133     if (!UseAES) {
1134       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1135         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1136       }
1137       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1138     } else {
1139       if (UseSSE > 2) {
1140         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1141           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1142         }
1143       } else {
1144         // The AES intrinsic stubs require AES instruction support (of course)
1145         // but also require sse3 mode or higher for instructions it use.
1146         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1147           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1148         }
1149         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1150       }
1151 
1152       // --AES-CTR begins--
1153       if (!UseAESIntrinsics) {
1154         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1155           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1156           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1157         }
1158       } else {
1159         if (supports_sse4_1()) {
1160           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1161             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1162           }
1163         } else {
1164            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1165            // but also require sse4.1 mode or higher for instructions it use.
1166           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1167              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1168            }
1169            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1170         }
1171       }
1172       // --AES-CTR ends--
1173     }
1174   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1175     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1176       warning("AES instructions are not available on this CPU");
1177       FLAG_SET_DEFAULT(UseAES, false);
1178     }
1179     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1180       warning("AES intrinsics are not available on this CPU");
1181       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1182     }
1183     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1184       warning("AES-CTR intrinsics are not available on this CPU");
1185       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1186     }
1187   }
1188 
1189   // Use CLMUL instructions if available.
1190   if (supports_clmul()) {
1191     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1192       UseCLMUL = true;
1193     }
1194   } else if (UseCLMUL) {
1195     if (!FLAG_IS_DEFAULT(UseCLMUL))
1196       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1197     FLAG_SET_DEFAULT(UseCLMUL, false);
1198   }
1199 
1200   if (UseCLMUL && (UseSSE > 2)) {
1201     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1202       UseCRC32Intrinsics = true;
1203     }
1204   } else if (UseCRC32Intrinsics) {
1205     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1206       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1207     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1208   }
1209 
1210   if (supports_avx2()) {
1211     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1212       UseAdler32Intrinsics = true;
1213     }
1214   } else if (UseAdler32Intrinsics) {
1215     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1216       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1217     }
1218     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1219   }
1220 
1221   if (supports_sse4_2() && supports_clmul()) {
1222     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1223       UseCRC32CIntrinsics = true;
1224     }
1225   } else if (UseCRC32CIntrinsics) {
1226     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1227       warning("CRC32C intrinsics are not available on this CPU");
1228     }
1229     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1230   }
1231 
1232   // GHASH/GCM intrinsics
1233   if (UseCLMUL && (UseSSE > 2)) {
1234     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1235       UseGHASHIntrinsics = true;
1236     }
1237   } else if (UseGHASHIntrinsics) {
1238     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1239       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1240     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1241   }
1242 
1243   // ChaCha20 Intrinsics
1244   // As long as the system supports AVX as a baseline we can do a
1245   // SIMD-enabled block function.  StubGenerator makes the determination
1246   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1247   // version.
1248   if (UseAVX >= 1) {
1249       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1250           UseChaCha20Intrinsics = true;
1251       }
1252   } else if (UseChaCha20Intrinsics) {
1253       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1254           warning("ChaCha20 intrinsic requires AVX instructions");
1255       }
1256       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1257   }
1258 
1259   // Kyber Intrinsics
1260   // Currently we only have them for AVX512
1261 #ifdef _LP64
1262   if (supports_evex() && supports_avx512bw()) {
1263       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1264           UseKyberIntrinsics = true;
1265       }
1266   } else
1267 #endif
1268   if (UseKyberIntrinsics) {
1269      warning("Intrinsics for ML-KEM are not available on this CPU.");
1270      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1271   }
1272 
1273   // Dilithium Intrinsics
1274   // Currently we only have them for AVX512
1275   if (supports_evex() && supports_avx512bw()) {
1276       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1277           UseDilithiumIntrinsics = true;
1278       }
1279   } else if (UseDilithiumIntrinsics) {
1280       warning("Intrinsics for ML-DSA are not available on this CPU.");
1281       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1282   }
1283 
1284   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1285   if (UseAVX >= 2) {
1286     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1287       UseBASE64Intrinsics = true;
1288     }
1289   } else if (UseBASE64Intrinsics) {
1290      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1291       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1292     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1293   }
1294 
1295   if (supports_fma()) {
1296     if (FLAG_IS_DEFAULT(UseFMA)) {
1297       UseFMA = true;
1298     }
1299   } else if (UseFMA) {
1300     warning("FMA instructions are not available on this CPU");
1301     FLAG_SET_DEFAULT(UseFMA, false);
1302   }
1303 
1304   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1305     UseMD5Intrinsics = true;
1306   }
1307 
1308   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1309     if (FLAG_IS_DEFAULT(UseSHA)) {
1310       UseSHA = true;
1311     }
1312   } else if (UseSHA) {
1313     warning("SHA instructions are not available on this CPU");
1314     FLAG_SET_DEFAULT(UseSHA, false);
1315   }
1316 
1317   if (supports_sha() && supports_sse4_1() && UseSHA) {
1318     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1319       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1320     }
1321   } else if (UseSHA1Intrinsics) {
1322     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1323     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1324   }
1325 
1326   if (supports_sse4_1() && UseSHA) {
1327     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1328       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1329     }
1330   } else if (UseSHA256Intrinsics) {
1331     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1332     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1333   }
1334 
1335   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1336     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1337       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1338     }
1339   } else if (UseSHA512Intrinsics) {
1340     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1341     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1342   }
1343 
1344   if (supports_evex() && supports_avx512bw()) {
1345       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1346           UseSHA3Intrinsics = true;
1347       }
1348   } else if (UseSHA3Intrinsics) {
1349       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1350       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1351   }
1352 
1353   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1354     FLAG_SET_DEFAULT(UseSHA, false);
1355   }
1356 
1357 #if COMPILER2_OR_JVMCI
1358   int max_vector_size = 0;
1359   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1360     // 16 byte vectors (in XMM) are supported with SSE2+
1361     max_vector_size = 16;
1362   } else if (UseAVX == 1 || UseAVX == 2) {
1363     // 32 bytes vectors (in YMM) are only supported with AVX+
1364     max_vector_size = 32;
1365   } else if (UseAVX > 2) {
1366     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1367     max_vector_size = 64;
1368   }
1369 
1370   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1371 
1372   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1373     if (MaxVectorSize < min_vector_size) {
1374       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1375       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1376     }
1377     if (MaxVectorSize > max_vector_size) {
1378       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1379       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1380     }
1381     if (!is_power_of_2(MaxVectorSize)) {
1382       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1383       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1384     }
1385   } else {
1386     // If default, use highest supported configuration
1387     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1388   }
1389 
1390 #if defined(COMPILER2) && defined(ASSERT)
1391   if (MaxVectorSize > 0) {
1392     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1393       tty->print_cr("State of YMM registers after signal handle:");
1394       int nreg = 4;
1395       const char* ymm_name[4] = {"0", "7", "8", "15"};
1396       for (int i = 0; i < nreg; i++) {
1397         tty->print("YMM%s:", ymm_name[i]);
1398         for (int j = 7; j >=0; j--) {
1399           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1400         }
1401         tty->cr();
1402       }
1403     }
1404   }
1405 #endif // COMPILER2 && ASSERT
1406 
1407   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1408     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1409       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1410     }
1411   } else if (UsePoly1305Intrinsics) {
1412     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1413     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1414   }
1415 
1416   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1417     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1418       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1419     }
1420   } else if (UseIntPolyIntrinsics) {
1421     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1422     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1423   }
1424 
1425   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1426     UseMultiplyToLenIntrinsic = true;
1427   }
1428   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1429     UseSquareToLenIntrinsic = true;
1430   }
1431   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1432     UseMulAddIntrinsic = true;
1433   }
1434   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1435     UseMontgomeryMultiplyIntrinsic = true;
1436   }
1437   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1438     UseMontgomerySquareIntrinsic = true;
1439   }
1440 #endif // COMPILER2_OR_JVMCI
1441 
1442   // On new cpus instructions which update whole XMM register should be used
1443   // to prevent partial register stall due to dependencies on high half.
1444   //
1445   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1446   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1447   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1448   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1449 
1450 
1451   if (is_zx()) { // ZX cpus specific settings
1452     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1453       UseStoreImmI16 = false; // don't use it on ZX cpus
1454     }
1455     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1456       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1457         // Use it on all ZX cpus
1458         UseAddressNop = true;
1459       }
1460     }
1461     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1462       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1463     }
1464     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1465       if (supports_sse3()) {
1466         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1467       } else {
1468         UseXmmRegToRegMoveAll = false;
1469       }
1470     }
1471     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1472 #ifdef COMPILER2
1473       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1474         // For new ZX cpus do the next optimization:
1475         // don't align the beginning of a loop if there are enough instructions
1476         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1477         // in current fetch line (OptoLoopAlignment) or the padding
1478         // is big (> MaxLoopPad).
1479         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1480         // generated NOP instructions. 11 is the largest size of one
1481         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1482         MaxLoopPad = 11;
1483       }
1484 #endif // COMPILER2
1485       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1486         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1487       }
1488       if (supports_sse4_2()) { // new ZX cpus
1489         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1490           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1491         }
1492       }
1493     }
1494 
1495     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1496       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1497     }
1498   }
1499 
1500   if (is_amd_family()) { // AMD cpus specific settings
1501     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1502       // Use it on new AMD cpus starting from Opteron.
1503       UseAddressNop = true;
1504     }
1505     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1506       // Use it on new AMD cpus starting from Opteron.
1507       UseNewLongLShift = true;
1508     }
1509     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1510       if (supports_sse4a()) {
1511         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1512       } else {
1513         UseXmmLoadAndClearUpper = false;
1514       }
1515     }
1516     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1517       if (supports_sse4a()) {
1518         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1519       } else {
1520         UseXmmRegToRegMoveAll = false;
1521       }
1522     }
1523     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1524       if (supports_sse4a()) {
1525         UseXmmI2F = true;
1526       } else {
1527         UseXmmI2F = false;
1528       }
1529     }
1530     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1531       if (supports_sse4a()) {
1532         UseXmmI2D = true;
1533       } else {
1534         UseXmmI2D = false;
1535       }
1536     }
1537 
1538     // some defaults for AMD family 15h
1539     if (cpu_family() == 0x15) {
1540       // On family 15h processors default is no sw prefetch
1541       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1542         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1543       }
1544       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1545       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1546         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1547       }
1548       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1549       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1550         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1551       }
1552       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1553         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1554       }
1555     }
1556 
1557 #ifdef COMPILER2
1558     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1559       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1560       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1561     }
1562 #endif // COMPILER2
1563 
1564     // Some defaults for AMD family >= 17h && Hygon family 18h
1565     if (cpu_family() >= 0x17) {
1566       // On family >=17h processors use XMM and UnalignedLoadStores
1567       // for Array Copy
1568       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1569         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1570       }
1571       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1572         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1573       }
1574 #ifdef COMPILER2
1575       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1576         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1577       }
1578 #endif
1579     }
1580   }
1581 
1582   if (is_intel()) { // Intel cpus specific settings
1583     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1584       UseStoreImmI16 = false; // don't use it on Intel cpus
1585     }
1586     if (is_intel_server_family() || cpu_family() == 15) {
1587       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1588         // Use it on all Intel cpus starting from PentiumPro
1589         UseAddressNop = true;
1590       }
1591     }
1592     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1593       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1594     }
1595     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1596       if (supports_sse3()) {
1597         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1598       } else {
1599         UseXmmRegToRegMoveAll = false;
1600       }
1601     }
1602     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1603 #ifdef COMPILER2
1604       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1605         // For new Intel cpus do the next optimization:
1606         // don't align the beginning of a loop if there are enough instructions
1607         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1608         // in current fetch line (OptoLoopAlignment) or the padding
1609         // is big (> MaxLoopPad).
1610         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1611         // generated NOP instructions. 11 is the largest size of one
1612         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1613         MaxLoopPad = 11;
1614       }
1615 #endif // COMPILER2
1616 
1617       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1618         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1619       }
1620       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1621         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1622           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1623         }
1624       }
1625     }
1626     if (is_atom_family() || is_knights_family()) {
1627 #ifdef COMPILER2
1628       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1629         OptoScheduling = true;
1630       }
1631 #endif
1632       if (supports_sse4_2()) { // Silvermont
1633         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1634           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1635         }
1636       }
1637       if (FLAG_IS_DEFAULT(UseIncDec)) {
1638         FLAG_SET_DEFAULT(UseIncDec, false);
1639       }
1640     }
1641     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1642       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1643     }
1644 #ifdef COMPILER2
1645     if (UseAVX > 2) {
1646       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1647           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1648            ArrayOperationPartialInlineSize != 0 &&
1649            ArrayOperationPartialInlineSize != 16 &&
1650            ArrayOperationPartialInlineSize != 32 &&
1651            ArrayOperationPartialInlineSize != 64)) {
1652         int inline_size = 0;
1653         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1654           inline_size = 64;
1655         } else if (MaxVectorSize >= 32) {
1656           inline_size = 32;
1657         } else if (MaxVectorSize >= 16) {
1658           inline_size = 16;
1659         }
1660         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1661           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1662         }
1663         ArrayOperationPartialInlineSize = inline_size;
1664       }
1665 
1666       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1667         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1668         if (ArrayOperationPartialInlineSize) {
1669           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1670         } else {
1671           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1672         }
1673       }
1674     }
1675 #endif
1676   }
1677 
1678 #ifdef COMPILER2
1679   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1680     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1681       OptimizeFill = false;
1682     }
1683   }
1684 #endif
1685   if (supports_sse4_2()) {
1686     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1687       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1688     }
1689   } else {
1690     if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1691       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1692     }
1693     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1694   }
1695   if (UseSSE42Intrinsics) {
1696     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1697       UseVectorizedMismatchIntrinsic = true;
1698     }
1699   } else if (UseVectorizedMismatchIntrinsic) {
1700     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1701       warning("vectorizedMismatch intrinsics are not available on this CPU");
1702     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1703   }
1704   if (UseAVX >= 2) {
1705     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1706   } else if (UseVectorizedHashCodeIntrinsic) {
1707     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1708       warning("vectorizedHashCode intrinsics are not available on this CPU");
1709     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1710   }
1711 
1712   // Use count leading zeros count instruction if available.
1713   if (supports_lzcnt()) {
1714     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1715       UseCountLeadingZerosInstruction = true;
1716     }
1717    } else if (UseCountLeadingZerosInstruction) {
1718     warning("lzcnt instruction is not available on this CPU");
1719     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1720   }
1721 
1722   // Use count trailing zeros instruction if available
1723   if (supports_bmi1()) {
1724     // tzcnt does not require VEX prefix
1725     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1726       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1727         // Don't use tzcnt if BMI1 is switched off on command line.
1728         UseCountTrailingZerosInstruction = false;
1729       } else {
1730         UseCountTrailingZerosInstruction = true;
1731       }
1732     }
1733   } else if (UseCountTrailingZerosInstruction) {
1734     warning("tzcnt instruction is not available on this CPU");
1735     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1736   }
1737 
1738   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1739   // VEX prefix is generated only when AVX > 0.
1740   if (supports_bmi1() && supports_avx()) {
1741     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1742       UseBMI1Instructions = true;
1743     }
1744   } else if (UseBMI1Instructions) {
1745     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1746     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1747   }
1748 
1749   if (supports_bmi2() && supports_avx()) {
1750     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1751       UseBMI2Instructions = true;
1752     }
1753   } else if (UseBMI2Instructions) {
1754     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1755     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1756   }
1757 
1758   // Use population count instruction if available.
1759   if (supports_popcnt()) {
1760     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1761       UsePopCountInstruction = true;
1762     }
1763   } else if (UsePopCountInstruction) {
1764     warning("POPCNT instruction is not available on this CPU");
1765     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1766   }
1767 
1768   // Use fast-string operations if available.
1769   if (supports_erms()) {
1770     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1771       UseFastStosb = true;
1772     }
1773   } else if (UseFastStosb) {
1774     warning("fast-string operations are not available on this CPU");
1775     FLAG_SET_DEFAULT(UseFastStosb, false);
1776   }
1777 
1778   // For AMD Processors use XMM/YMM MOVDQU instructions
1779   // for Object Initialization as default
1780   if (is_amd() && cpu_family() >= 0x19) {
1781     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1782       UseFastStosb = false;
1783     }
1784   }
1785 
1786 #ifdef COMPILER2
1787   if (is_intel() && MaxVectorSize > 16) {
1788     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1789       UseFastStosb = false;
1790     }
1791   }
1792 #endif
1793 
1794   // Use XMM/YMM MOVDQU instruction for Object Initialization
1795   if (UseUnalignedLoadStores) {
1796     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1797       UseXMMForObjInit = true;
1798     }
1799   } else if (UseXMMForObjInit) {
1800     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1801     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1802   }
1803 
1804 #ifdef COMPILER2
1805   if (FLAG_IS_DEFAULT(AlignVector)) {
1806     // Modern processors allow misaligned memory operations for vectors.
1807     AlignVector = !UseUnalignedLoadStores;
1808   }
1809 #endif // COMPILER2
1810 
1811   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1812     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1813       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1814     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1815       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1816     }
1817   }
1818 
1819   // Allocation prefetch settings
1820   int cache_line_size = checked_cast<int>(prefetch_data_size());
1821   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1822       (cache_line_size > AllocatePrefetchStepSize)) {
1823     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1824   }
1825 
1826   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1827     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1828     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1829       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1830     }
1831     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1832   }
1833 
1834   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1835     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1836     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1837   }
1838 
1839   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1840     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1841         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1842       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1843     }
1844 #ifdef COMPILER2
1845     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1846       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1847     }
1848 #endif
1849   }
1850 
1851   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1852 #ifdef COMPILER2
1853     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1854       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1855     }
1856 #endif
1857   }
1858 
1859   // Prefetch settings
1860 
1861   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1862   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1863   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1864   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1865 
1866   // gc copy/scan is disabled if prefetchw isn't supported, because
1867   // Prefetch::write emits an inlined prefetchw on Linux.
1868   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1869   // The used prefetcht0 instruction works for both amd64 and em64t.
1870 
1871   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1872     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1873   }
1874   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1875     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1876   }
1877 
1878   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1879      (cache_line_size > ContendedPaddingWidth))
1880      ContendedPaddingWidth = cache_line_size;
1881 
1882   // This machine allows unaligned memory accesses
1883   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1884     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1885   }
1886 
1887 #ifndef PRODUCT
1888   if (log_is_enabled(Info, os, cpu)) {
1889     LogStream ls(Log(os, cpu)::info());
1890     outputStream* log = &ls;
1891     log->print_cr("Logical CPUs per core: %u",
1892                   logical_processors_per_package());
1893     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1894     log->print("UseSSE=%d", UseSSE);
1895     if (UseAVX > 0) {
1896       log->print("  UseAVX=%d", UseAVX);
1897     }
1898     if (UseAES) {
1899       log->print("  UseAES=1");
1900     }
1901 #ifdef COMPILER2
1902     if (MaxVectorSize > 0) {
1903       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1904     }
1905 #endif
1906     log->cr();
1907     log->print("Allocation");
1908     if (AllocatePrefetchStyle <= 0) {
1909       log->print_cr(": no prefetching");
1910     } else {
1911       log->print(" prefetching: ");
1912       if (AllocatePrefetchInstr == 0) {
1913         log->print("PREFETCHNTA");
1914       } else if (AllocatePrefetchInstr == 1) {
1915         log->print("PREFETCHT0");
1916       } else if (AllocatePrefetchInstr == 2) {
1917         log->print("PREFETCHT2");
1918       } else if (AllocatePrefetchInstr == 3) {
1919         log->print("PREFETCHW");
1920       }
1921       if (AllocatePrefetchLines > 1) {
1922         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1923       } else {
1924         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1925       }
1926     }
1927 
1928     if (PrefetchCopyIntervalInBytes > 0) {
1929       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1930     }
1931     if (PrefetchScanIntervalInBytes > 0) {
1932       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1933     }
1934     if (ContendedPaddingWidth > 0) {
1935       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1936     }
1937   }
1938 #endif // !PRODUCT
1939   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1940       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1941   }
1942   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1943       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1944   }
1945 }
1946 
1947 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1948   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1949   if (vrt == XenHVM) {
1950     st->print_cr("Xen hardware-assisted virtualization detected");
1951   } else if (vrt == KVM) {
1952     st->print_cr("KVM virtualization detected");
1953   } else if (vrt == VMWare) {
1954     st->print_cr("VMWare virtualization detected");
1955     VirtualizationSupport::print_virtualization_info(st);
1956   } else if (vrt == HyperV) {
1957     st->print_cr("Hyper-V virtualization detected");
1958   } else if (vrt == HyperVRole) {
1959     st->print_cr("Hyper-V role detected");
1960   }
1961 }
1962 
1963 bool VM_Version::compute_has_intel_jcc_erratum() {
1964   if (!is_intel_family_core()) {
1965     // Only Intel CPUs are affected.
1966     return false;
1967   }
1968   // The following table of affected CPUs is based on the following document released by Intel:
1969   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1970   switch (_model) {
1971   case 0x8E:
1972     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1973     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1974     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1975     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1976     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1977     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1978     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1979     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1980     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1981     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1982   case 0x4E:
1983     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1984     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1985     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1986     return _stepping == 0x3;
1987   case 0x55:
1988     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1989     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1990     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1991     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1992     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1993     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1994     return _stepping == 0x4 || _stepping == 0x7;
1995   case 0x5E:
1996     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1997     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1998     return _stepping == 0x3;
1999   case 0x9E:
2000     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2001     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2002     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2003     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2004     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2005     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2006     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2007     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2008     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2009     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2010     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2011     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2012     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2013     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2014     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2015   case 0xA5:
2016     // Not in Intel documentation.
2017     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2018     return true;
2019   case 0xA6:
2020     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2021     return _stepping == 0x0;
2022   case 0xAE:
2023     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2024     return _stepping == 0xA;
2025   default:
2026     // If we are running on another intel machine not recognized in the table, we are okay.
2027     return false;
2028   }
2029 }
2030 
2031 // On Xen, the cpuid instruction returns
2032 //  eax / registers[0]: Version of Xen
2033 //  ebx / registers[1]: chars 'XenV'
2034 //  ecx / registers[2]: chars 'MMXe'
2035 //  edx / registers[3]: chars 'nVMM'
2036 //
2037 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2038 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2039 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2040 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2041 //
2042 // more information :
2043 // https://kb.vmware.com/s/article/1009458
2044 //
2045 void VM_Version::check_virtualizations() {
2046   uint32_t registers[4] = {0};
2047   char signature[13] = {0};
2048 
2049   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2050   // from 0x40000000 until 0x40010000.
2051   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2052   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2053     detect_virt_stub(leaf, registers);
2054     memcpy(signature, &registers[1], 12);
2055 
2056     if (strncmp("VMwareVMware", signature, 12) == 0) {
2057       Abstract_VM_Version::_detected_virtualization = VMWare;
2058       // check for extended metrics from guestlib
2059       VirtualizationSupport::initialize();
2060     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2061       Abstract_VM_Version::_detected_virtualization = HyperV;
2062 #ifdef _WINDOWS
2063       // CPUID leaf 0x40000007 is available to the root partition only.
2064       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2065       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2066       detect_virt_stub(0x40000007, registers);
2067       if ((registers[0] != 0x0) ||
2068           (registers[1] != 0x0) ||
2069           (registers[2] != 0x0) ||
2070           (registers[3] != 0x0)) {
2071         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2072       }
2073 #endif
2074     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2075       Abstract_VM_Version::_detected_virtualization = KVM;
2076     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2077       Abstract_VM_Version::_detected_virtualization = XenHVM;
2078     }
2079   }
2080 }
2081 
2082 #ifdef COMPILER2
2083 // Determine if it's running on Cascade Lake using default options.
2084 bool VM_Version::is_default_intel_cascade_lake() {
2085   return FLAG_IS_DEFAULT(UseAVX) &&
2086          FLAG_IS_DEFAULT(MaxVectorSize) &&
2087          UseAVX > 2 &&
2088          is_intel_cascade_lake();
2089 }
2090 #endif
2091 
2092 bool VM_Version::is_intel_cascade_lake() {
2093   return is_intel_skylake() && _stepping >= 5;
2094 }
2095 
2096 bool VM_Version::is_intel_darkmont() {
2097   return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2098 }
2099 
2100 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2101 // for implementing the array copy and clear operations.
2102 // The Intel platforms that supports the serialize instruction
2103 // has improved implementation of 64-byte load/stores and so the default
2104 // threshold is set to 0 for these platforms.
2105 int VM_Version::avx3_threshold() {
2106   return (is_intel_server_family() &&
2107           supports_serialize() &&
2108           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2109 }
2110 
2111 void VM_Version::clear_apx_test_state() {
2112   clear_apx_test_state_stub();
2113 }
2114 
2115 static bool _vm_version_initialized = false;
2116 
2117 void VM_Version::initialize() {
2118   ResourceMark rm;
2119 
2120   // Making this stub must be FIRST use of assembler
2121   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2122   if (stub_blob == nullptr) {
2123     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2124   }
2125   CodeBuffer c(stub_blob);
2126   VM_Version_StubGenerator g(&c);
2127 
2128   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2129                                      g.generate_get_cpu_info());
2130   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2131                                      g.generate_detect_virt());
2132   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2133                                      g.clear_apx_test_state());
2134   get_processor_features();
2135 
2136   Assembler::precompute_instructions();
2137 
2138   if (VM_Version::supports_hv()) { // Supports hypervisor
2139     check_virtualizations();
2140   }
2141   _vm_version_initialized = true;
2142 }
2143 
2144 typedef enum {
2145    CPU_FAMILY_8086_8088  = 0,
2146    CPU_FAMILY_INTEL_286  = 2,
2147    CPU_FAMILY_INTEL_386  = 3,
2148    CPU_FAMILY_INTEL_486  = 4,
2149    CPU_FAMILY_PENTIUM    = 5,
2150    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2151    CPU_FAMILY_PENTIUM_4  = 0xF
2152 } FamilyFlag;
2153 
2154 typedef enum {
2155   RDTSCP_FLAG  = 0x08000000, // bit 27
2156   INTEL64_FLAG = 0x20000000  // bit 29
2157 } _featureExtendedEdxFlag;
2158 
2159 typedef enum {
2160    FPU_FLAG     = 0x00000001,
2161    VME_FLAG     = 0x00000002,
2162    DE_FLAG      = 0x00000004,
2163    PSE_FLAG     = 0x00000008,
2164    TSC_FLAG     = 0x00000010,
2165    MSR_FLAG     = 0x00000020,
2166    PAE_FLAG     = 0x00000040,
2167    MCE_FLAG     = 0x00000080,
2168    CX8_FLAG     = 0x00000100,
2169    APIC_FLAG    = 0x00000200,
2170    SEP_FLAG     = 0x00000800,
2171    MTRR_FLAG    = 0x00001000,
2172    PGE_FLAG     = 0x00002000,
2173    MCA_FLAG     = 0x00004000,
2174    CMOV_FLAG    = 0x00008000,
2175    PAT_FLAG     = 0x00010000,
2176    PSE36_FLAG   = 0x00020000,
2177    PSNUM_FLAG   = 0x00040000,
2178    CLFLUSH_FLAG = 0x00080000,
2179    DTS_FLAG     = 0x00200000,
2180    ACPI_FLAG    = 0x00400000,
2181    MMX_FLAG     = 0x00800000,
2182    FXSR_FLAG    = 0x01000000,
2183    SSE_FLAG     = 0x02000000,
2184    SSE2_FLAG    = 0x04000000,
2185    SS_FLAG      = 0x08000000,
2186    HTT_FLAG     = 0x10000000,
2187    TM_FLAG      = 0x20000000
2188 } FeatureEdxFlag;
2189 
2190 static BufferBlob* cpuid_brand_string_stub_blob;
2191 static const int   cpuid_brand_string_stub_size = 550;
2192 
2193 extern "C" {
2194   typedef void (*getCPUIDBrandString_stub_t)(void*);
2195 }
2196 
2197 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2198 
2199 // VM_Version statics
2200 enum {
2201   ExtendedFamilyIdLength_INTEL = 16,
2202   ExtendedFamilyIdLength_AMD   = 24
2203 };
2204 
2205 const size_t VENDOR_LENGTH = 13;
2206 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2207 static char* _cpu_brand_string = nullptr;
2208 static int64_t _max_qualified_cpu_frequency = 0;
2209 
2210 static int _no_of_threads = 0;
2211 static int _no_of_cores = 0;
2212 
2213 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2214   "8086/8088",
2215   "",
2216   "286",
2217   "386",
2218   "486",
2219   "Pentium",
2220   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2221   "",
2222   "",
2223   "",
2224   "",
2225   "",
2226   "",
2227   "",
2228   "",
2229   "Pentium 4"
2230 };
2231 
2232 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2233   "",
2234   "",
2235   "",
2236   "",
2237   "5x86",
2238   "K5/K6",
2239   "Athlon/AthlonXP",
2240   "",
2241   "",
2242   "",
2243   "",
2244   "",
2245   "",
2246   "",
2247   "",
2248   "Opteron/Athlon64",
2249   "Opteron QC/Phenom",  // Barcelona et.al.
2250   "",
2251   "",
2252   "",
2253   "",
2254   "",
2255   "",
2256   "Zen"
2257 };
2258 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2259 // September 2013, Vol 3C Table 35-1
2260 const char* const _model_id_pentium_pro[] = {
2261   "",
2262   "Pentium Pro",
2263   "",
2264   "Pentium II model 3",
2265   "",
2266   "Pentium II model 5/Xeon/Celeron",
2267   "Celeron",
2268   "Pentium III/Pentium III Xeon",
2269   "Pentium III/Pentium III Xeon",
2270   "Pentium M model 9",    // Yonah
2271   "Pentium III, model A",
2272   "Pentium III, model B",
2273   "",
2274   "Pentium M model D",    // Dothan
2275   "",
2276   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2277   "",
2278   "",
2279   "",
2280   "",
2281   "",
2282   "",
2283   "Celeron",              // 0x16 Celeron 65nm
2284   "Core 2",               // 0x17 Penryn / Harpertown
2285   "",
2286   "",
2287   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2288   "Atom",                 // 0x1B Z5xx series Silverthorn
2289   "",
2290   "Core 2",               // 0x1D Dunnington (6-core)
2291   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2292   "",
2293   "",
2294   "",
2295   "",
2296   "",
2297   "",
2298   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2299   "",
2300   "",
2301   "",                     // 0x28
2302   "",
2303   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2304   "",
2305   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2306   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2307   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2308   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2309   "",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "",
2315   "",
2316   "",
2317   "",
2318   "",
2319   "Ivy Bridge",           // 0x3a
2320   "",
2321   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2322   "",                     // 0x3d "Next Generation Intel Core Processor"
2323   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2324   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2325   "",
2326   "",
2327   "",
2328   "",
2329   "",
2330   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2331   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2332   nullptr
2333 };
2334 
2335 /* Brand ID is for back compatibility
2336  * Newer CPUs uses the extended brand string */
2337 const char* const _brand_id[] = {
2338   "",
2339   "Celeron processor",
2340   "Pentium III processor",
2341   "Intel Pentium III Xeon processor",
2342   "",
2343   "",
2344   "",
2345   "",
2346   "Intel Pentium 4 processor",
2347   nullptr
2348 };
2349 
2350 
2351 const char* const _feature_edx_id[] = {
2352   "On-Chip FPU",
2353   "Virtual Mode Extensions",
2354   "Debugging Extensions",
2355   "Page Size Extensions",
2356   "Time Stamp Counter",
2357   "Model Specific Registers",
2358   "Physical Address Extension",
2359   "Machine Check Exceptions",
2360   "CMPXCHG8B Instruction",
2361   "On-Chip APIC",
2362   "",
2363   "Fast System Call",
2364   "Memory Type Range Registers",
2365   "Page Global Enable",
2366   "Machine Check Architecture",
2367   "Conditional Mov Instruction",
2368   "Page Attribute Table",
2369   "36-bit Page Size Extension",
2370   "Processor Serial Number",
2371   "CLFLUSH Instruction",
2372   "",
2373   "Debug Trace Store feature",
2374   "ACPI registers in MSR space",
2375   "Intel Architecture MMX Technology",
2376   "Fast Float Point Save and Restore",
2377   "Streaming SIMD extensions",
2378   "Streaming SIMD extensions 2",
2379   "Self-Snoop",
2380   "Hyper Threading",
2381   "Thermal Monitor",
2382   "",
2383   "Pending Break Enable"
2384 };
2385 
2386 const char* const _feature_extended_edx_id[] = {
2387   "",
2388   "",
2389   "",
2390   "",
2391   "",
2392   "",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "SYSCALL/SYSRET",
2399   "",
2400   "",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "Execute Disable Bit",
2408   "",
2409   "",
2410   "",
2411   "",
2412   "",
2413   "",
2414   "RDTSCP",
2415   "",
2416   "Intel 64 Architecture",
2417   "",
2418   ""
2419 };
2420 
2421 const char* const _feature_ecx_id[] = {
2422   "Streaming SIMD Extensions 3",
2423   "PCLMULQDQ",
2424   "64-bit DS Area",
2425   "MONITOR/MWAIT instructions",
2426   "CPL Qualified Debug Store",
2427   "Virtual Machine Extensions",
2428   "Safer Mode Extensions",
2429   "Enhanced Intel SpeedStep technology",
2430   "Thermal Monitor 2",
2431   "Supplemental Streaming SIMD Extensions 3",
2432   "L1 Context ID",
2433   "",
2434   "Fused Multiply-Add",
2435   "CMPXCHG16B",
2436   "xTPR Update Control",
2437   "Perfmon and Debug Capability",
2438   "",
2439   "Process-context identifiers",
2440   "Direct Cache Access",
2441   "Streaming SIMD extensions 4.1",
2442   "Streaming SIMD extensions 4.2",
2443   "x2APIC",
2444   "MOVBE",
2445   "Popcount instruction",
2446   "TSC-Deadline",
2447   "AESNI",
2448   "XSAVE",
2449   "OSXSAVE",
2450   "AVX",
2451   "F16C",
2452   "RDRAND",
2453   ""
2454 };
2455 
2456 const char* const _feature_extended_ecx_id[] = {
2457   "LAHF/SAHF instruction support",
2458   "Core multi-processor legacy mode",
2459   "",
2460   "",
2461   "",
2462   "Advanced Bit Manipulations: LZCNT",
2463   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2464   "Misaligned SSE mode",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "",
2485   "",
2486   "",
2487   "",
2488   ""
2489 };
2490 
2491 void VM_Version::initialize_tsc(void) {
2492   ResourceMark rm;
2493 
2494   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2495   if (cpuid_brand_string_stub_blob == nullptr) {
2496     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2497   }
2498   CodeBuffer c(cpuid_brand_string_stub_blob);
2499   VM_Version_StubGenerator g(&c);
2500   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2501                                    g.generate_getCPUIDBrandString());
2502 }
2503 
2504 const char* VM_Version::cpu_model_description(void) {
2505   uint32_t cpu_family = extended_cpu_family();
2506   uint32_t cpu_model = extended_cpu_model();
2507   const char* model = nullptr;
2508 
2509   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2510     for (uint32_t i = 0; i <= cpu_model; i++) {
2511       model = _model_id_pentium_pro[i];
2512       if (model == nullptr) {
2513         break;
2514       }
2515     }
2516   }
2517   return model;
2518 }
2519 
2520 const char* VM_Version::cpu_brand_string(void) {
2521   if (_cpu_brand_string == nullptr) {
2522     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2523     if (nullptr == _cpu_brand_string) {
2524       return nullptr;
2525     }
2526     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2527     if (ret_val != OS_OK) {
2528       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2529       _cpu_brand_string = nullptr;
2530     }
2531   }
2532   return _cpu_brand_string;
2533 }
2534 
2535 const char* VM_Version::cpu_brand(void) {
2536   const char*  brand  = nullptr;
2537 
2538   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2539     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2540     brand = _brand_id[0];
2541     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2542       brand = _brand_id[i];
2543     }
2544   }
2545   return brand;
2546 }
2547 
2548 bool VM_Version::cpu_is_em64t(void) {
2549   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2550 }
2551 
2552 bool VM_Version::is_netburst(void) {
2553   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2554 }
2555 
2556 bool VM_Version::supports_tscinv_ext(void) {
2557   if (!supports_tscinv_bit()) {
2558     return false;
2559   }
2560 
2561   if (is_intel()) {
2562     return true;
2563   }
2564 
2565   if (is_amd()) {
2566     return !is_amd_Barcelona();
2567   }
2568 
2569   if (is_hygon()) {
2570     return true;
2571   }
2572 
2573   return false;
2574 }
2575 
2576 void VM_Version::resolve_cpu_information_details(void) {
2577 
2578   // in future we want to base this information on proper cpu
2579   // and cache topology enumeration such as:
2580   // Intel 64 Architecture Processor Topology Enumeration
2581   // which supports system cpu and cache topology enumeration
2582   // either using 2xAPICIDs or initial APICIDs
2583 
2584   // currently only rough cpu information estimates
2585   // which will not necessarily reflect the exact configuration of the system
2586 
2587   // this is the number of logical hardware threads
2588   // visible to the operating system
2589   _no_of_threads = os::processor_count();
2590 
2591   // find out number of threads per cpu package
2592   int threads_per_package = threads_per_core() * cores_per_cpu();
2593 
2594   // use amount of threads visible to the process in order to guess number of sockets
2595   _no_of_sockets = _no_of_threads / threads_per_package;
2596 
2597   // process might only see a subset of the total number of threads
2598   // from a single processor package. Virtualization/resource management for example.
2599   // If so then just write a hard 1 as num of pkgs.
2600   if (0 == _no_of_sockets) {
2601     _no_of_sockets = 1;
2602   }
2603 
2604   // estimate the number of cores
2605   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2606 }
2607 
2608 
2609 const char* VM_Version::cpu_family_description(void) {
2610   int cpu_family_id = extended_cpu_family();
2611   if (is_amd()) {
2612     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2613       return _family_id_amd[cpu_family_id];
2614     }
2615   }
2616   if (is_intel()) {
2617     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2618       return cpu_model_description();
2619     }
2620     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2621       return _family_id_intel[cpu_family_id];
2622     }
2623   }
2624   if (is_hygon()) {
2625     return "Dhyana";
2626   }
2627   return "Unknown x86";
2628 }
2629 
2630 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2631   assert(buf != nullptr, "buffer is null!");
2632   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2633 
2634   const char* cpu_type = nullptr;
2635   const char* x64 = nullptr;
2636 
2637   if (is_intel()) {
2638     cpu_type = "Intel";
2639     x64 = cpu_is_em64t() ? " Intel64" : "";
2640   } else if (is_amd()) {
2641     cpu_type = "AMD";
2642     x64 = cpu_is_em64t() ? " AMD64" : "";
2643   } else if (is_hygon()) {
2644     cpu_type = "Hygon";
2645     x64 = cpu_is_em64t() ? " AMD64" : "";
2646   } else {
2647     cpu_type = "Unknown x86";
2648     x64 = cpu_is_em64t() ? " x86_64" : "";
2649   }
2650 
2651   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2652     cpu_type,
2653     cpu_family_description(),
2654     supports_ht() ? " (HT)" : "",
2655     supports_sse3() ? " SSE3" : "",
2656     supports_ssse3() ? " SSSE3" : "",
2657     supports_sse4_1() ? " SSE4.1" : "",
2658     supports_sse4_2() ? " SSE4.2" : "",
2659     supports_sse4a() ? " SSE4A" : "",
2660     is_netburst() ? " Netburst" : "",
2661     is_intel_family_core() ? " Core" : "",
2662     x64);
2663 
2664   return OS_OK;
2665 }
2666 
2667 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2668   assert(buf != nullptr, "buffer is null!");
2669   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2670   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2671 
2672   // invoke newly generated asm code to fetch CPU Brand String
2673   getCPUIDBrandString_stub(&_cpuid_info);
2674 
2675   // fetch results into buffer
2676   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2677   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2678   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2679   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2680   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2681   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2682   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2683   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2684   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2685   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2686   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2687   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2688 
2689   return OS_OK;
2690 }
2691 
2692 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2693   guarantee(buf != nullptr, "buffer is null!");
2694   guarantee(buf_len > 0, "buffer len not enough!");
2695 
2696   unsigned int flag = 0;
2697   unsigned int fi = 0;
2698   size_t       written = 0;
2699   const char*  prefix = "";
2700 
2701 #define WRITE_TO_BUF(string)                                                          \
2702   {                                                                                   \
2703     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2704     if (res < 0) {                                                                    \
2705       return buf_len - 1;                                                             \
2706     }                                                                                 \
2707     written += res;                                                                   \
2708     if (prefix[0] == '\0') {                                                          \
2709       prefix = ", ";                                                                  \
2710     }                                                                                 \
2711   }
2712 
2713   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2714     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2715       continue; /* no hyperthreading */
2716     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2717       continue; /* no fast system call */
2718     }
2719     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2720       WRITE_TO_BUF(_feature_edx_id[fi]);
2721     }
2722   }
2723 
2724   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2725     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2726       WRITE_TO_BUF(_feature_ecx_id[fi]);
2727     }
2728   }
2729 
2730   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2731     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2732       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2733     }
2734   }
2735 
2736   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2737     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2738       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2739     }
2740   }
2741 
2742   if (supports_tscinv_bit()) {
2743       WRITE_TO_BUF("Invariant TSC");
2744   }
2745 
2746   return written;
2747 }
2748 
2749 /**
2750  * Write a detailed description of the cpu to a given buffer, including
2751  * feature set.
2752  */
2753 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2754   assert(buf != nullptr, "buffer is null!");
2755   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2756 
2757   static const char* unknown = "<unknown>";
2758   char               vendor_id[VENDOR_LENGTH];
2759   const char*        family = nullptr;
2760   const char*        model = nullptr;
2761   const char*        brand = nullptr;
2762   int                outputLen = 0;
2763 
2764   family = cpu_family_description();
2765   if (family == nullptr) {
2766     family = unknown;
2767   }
2768 
2769   model = cpu_model_description();
2770   if (model == nullptr) {
2771     model = unknown;
2772   }
2773 
2774   brand = cpu_brand_string();
2775 
2776   if (brand == nullptr) {
2777     brand = cpu_brand();
2778     if (brand == nullptr) {
2779       brand = unknown;
2780     }
2781   }
2782 
2783   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2784   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2785   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2786   vendor_id[VENDOR_LENGTH-1] = '\0';
2787 
2788   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2789     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2790     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2791     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2792     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2793     "Supports: ",
2794     brand,
2795     vendor_id,
2796     family,
2797     extended_cpu_family(),
2798     model,
2799     extended_cpu_model(),
2800     cpu_stepping(),
2801     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2802     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2803     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2804     _cpuid_info.std_cpuid1_eax.value,
2805     _cpuid_info.std_cpuid1_ebx.value,
2806     _cpuid_info.std_cpuid1_ecx.value,
2807     _cpuid_info.std_cpuid1_edx.value,
2808     _cpuid_info.ext_cpuid1_eax,
2809     _cpuid_info.ext_cpuid1_ebx,
2810     _cpuid_info.ext_cpuid1_ecx,
2811     _cpuid_info.ext_cpuid1_edx);
2812 
2813   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2814     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2815     return OS_ERR;
2816   }
2817 
2818   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2819 
2820   return OS_OK;
2821 }
2822 
2823 
2824 // Fill in Abstract_VM_Version statics
2825 void VM_Version::initialize_cpu_information() {
2826   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2827   assert(!_initialized, "shouldn't be initialized yet");
2828   resolve_cpu_information_details();
2829 
2830   // initialize cpu_name and cpu_desc
2831   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2832   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2833   _initialized = true;
2834 }
2835 
2836 /**
2837  *  For information about extracting the frequency from the cpu brand string, please see:
2838  *
2839  *    Intel Processor Identification and the CPUID Instruction
2840  *    Application Note 485
2841  *    May 2012
2842  *
2843  * The return value is the frequency in Hz.
2844  */
2845 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2846   const char* const brand_string = cpu_brand_string();
2847   if (brand_string == nullptr) {
2848     return 0;
2849   }
2850   const int64_t MEGA = 1000000;
2851   int64_t multiplier = 0;
2852   int64_t frequency = 0;
2853   uint8_t idx = 0;
2854   // The brand string buffer is at most 48 bytes.
2855   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2856   for (; idx < 48-2; ++idx) {
2857     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2858     // Search brand string for "yHz" where y is M, G, or T.
2859     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2860       if (brand_string[idx] == 'M') {
2861         multiplier = MEGA;
2862       } else if (brand_string[idx] == 'G') {
2863         multiplier = MEGA * 1000;
2864       } else if (brand_string[idx] == 'T') {
2865         multiplier = MEGA * MEGA;
2866       }
2867       break;
2868     }
2869   }
2870   if (multiplier > 0) {
2871     // Compute frequency (in Hz) from brand string.
2872     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2873       frequency =  (brand_string[idx-4] - '0') * multiplier;
2874       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2875       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2876     } else { // format is "xxxx"
2877       frequency =  (brand_string[idx-4] - '0') * 1000;
2878       frequency += (brand_string[idx-3] - '0') * 100;
2879       frequency += (brand_string[idx-2] - '0') * 10;
2880       frequency += (brand_string[idx-1] - '0');
2881       frequency *= multiplier;
2882     }
2883   }
2884   return frequency;
2885 }
2886 
2887 
2888 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2889   if (_max_qualified_cpu_frequency == 0) {
2890     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2891   }
2892   return _max_qualified_cpu_frequency;
2893 }
2894 
2895 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2896   VM_Features vm_features;
2897   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2898     vm_features.set_feature(CPU_CX8);
2899   if (std_cpuid1_edx.bits.cmov != 0)
2900     vm_features.set_feature(CPU_CMOV);
2901   if (std_cpuid1_edx.bits.clflush != 0)
2902     vm_features.set_feature(CPU_FLUSH);
2903   // clflush should always be available on x86_64
2904   // if not we are in real trouble because we rely on it
2905   // to flush the code cache.
2906   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2907   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2908       ext_cpuid1_edx.bits.fxsr != 0))
2909     vm_features.set_feature(CPU_FXSR);
2910   // HT flag is set for multi-core processors also.
2911   if (threads_per_core() > 1)
2912     vm_features.set_feature(CPU_HT);
2913   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2914       ext_cpuid1_edx.bits.mmx != 0))
2915     vm_features.set_feature(CPU_MMX);
2916   if (std_cpuid1_edx.bits.sse != 0)
2917     vm_features.set_feature(CPU_SSE);
2918   if (std_cpuid1_edx.bits.sse2 != 0)
2919     vm_features.set_feature(CPU_SSE2);
2920   if (std_cpuid1_ecx.bits.sse3 != 0)
2921     vm_features.set_feature(CPU_SSE3);
2922   if (std_cpuid1_ecx.bits.ssse3 != 0)
2923     vm_features.set_feature(CPU_SSSE3);
2924   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2925     vm_features.set_feature(CPU_SSE4_1);
2926   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2927     vm_features.set_feature(CPU_SSE4_2);
2928   if (std_cpuid1_ecx.bits.popcnt != 0)
2929     vm_features.set_feature(CPU_POPCNT);
2930   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2931       xem_xcr0_eax.bits.apx_f != 0 &&
2932       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2933     vm_features.set_feature(CPU_APX_F);
2934   }
2935   if (std_cpuid1_ecx.bits.avx != 0 &&
2936       std_cpuid1_ecx.bits.osxsave != 0 &&
2937       xem_xcr0_eax.bits.sse != 0 &&
2938       xem_xcr0_eax.bits.ymm != 0) {
2939     vm_features.set_feature(CPU_AVX);
2940     vm_features.set_feature(CPU_VZEROUPPER);
2941     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2942       vm_features.set_feature(CPU_SHA512);
2943     if (std_cpuid1_ecx.bits.f16c != 0)
2944       vm_features.set_feature(CPU_F16C);
2945     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2946       vm_features.set_feature(CPU_AVX2);
2947       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2948         vm_features.set_feature(CPU_AVX_IFMA);
2949     }
2950     if (sef_cpuid7_ecx.bits.gfni != 0)
2951         vm_features.set_feature(CPU_GFNI);
2952     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2953         xem_xcr0_eax.bits.opmask != 0 &&
2954         xem_xcr0_eax.bits.zmm512 != 0 &&
2955         xem_xcr0_eax.bits.zmm32 != 0) {
2956       vm_features.set_feature(CPU_AVX512F);
2957       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2958         vm_features.set_feature(CPU_AVX512CD);
2959       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2960         vm_features.set_feature(CPU_AVX512DQ);
2961       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2962         vm_features.set_feature(CPU_AVX512_IFMA);
2963       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2964         vm_features.set_feature(CPU_AVX512PF);
2965       if (sef_cpuid7_ebx.bits.avx512er != 0)
2966         vm_features.set_feature(CPU_AVX512ER);
2967       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2968         vm_features.set_feature(CPU_AVX512BW);
2969       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2970         vm_features.set_feature(CPU_AVX512VL);
2971       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2972         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2973       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2974         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2975       if (sef_cpuid7_ecx.bits.vaes != 0)
2976         vm_features.set_feature(CPU_AVX512_VAES);
2977       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2978         vm_features.set_feature(CPU_AVX512_VNNI);
2979       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2980         vm_features.set_feature(CPU_AVX512_BITALG);
2981       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2982         vm_features.set_feature(CPU_AVX512_VBMI);
2983       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2984         vm_features.set_feature(CPU_AVX512_VBMI2);
2985     }
2986     if (is_intel()) {
2987       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2988           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2989           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2990           xem_xcr0_eax.bits.opmask != 0 &&
2991           xem_xcr0_eax.bits.zmm512 != 0 &&
2992           xem_xcr0_eax.bits.zmm32 != 0) {
2993         vm_features.set_feature(CPU_AVX10_1);
2994         vm_features.set_feature(CPU_AVX512F);
2995         vm_features.set_feature(CPU_AVX512CD);
2996         vm_features.set_feature(CPU_AVX512DQ);
2997         vm_features.set_feature(CPU_AVX512PF);
2998         vm_features.set_feature(CPU_AVX512ER);
2999         vm_features.set_feature(CPU_AVX512BW);
3000         vm_features.set_feature(CPU_AVX512VL);
3001         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3002         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3003         vm_features.set_feature(CPU_AVX512_VAES);
3004         vm_features.set_feature(CPU_AVX512_VNNI);
3005         vm_features.set_feature(CPU_AVX512_BITALG);
3006         vm_features.set_feature(CPU_AVX512_VBMI);
3007         vm_features.set_feature(CPU_AVX512_VBMI2);
3008         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3009           vm_features.set_feature(CPU_AVX10_2);
3010         }
3011       }
3012     }
3013   }
3014 
3015   if (std_cpuid1_ecx.bits.hv != 0)
3016     vm_features.set_feature(CPU_HV);
3017   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3018     vm_features.set_feature(CPU_BMI1);
3019   if (std_cpuid1_edx.bits.tsc != 0)
3020     vm_features.set_feature(CPU_TSC);
3021   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3022     vm_features.set_feature(CPU_TSCINV_BIT);
3023   if (std_cpuid1_ecx.bits.aes != 0)
3024     vm_features.set_feature(CPU_AES);
3025   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3026     vm_features.set_feature(CPU_LZCNT);
3027   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3028     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3029   if (sef_cpuid7_ebx.bits.erms != 0)
3030     vm_features.set_feature(CPU_ERMS);
3031   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3032     vm_features.set_feature(CPU_FSRM);
3033   if (std_cpuid1_ecx.bits.clmul != 0)
3034     vm_features.set_feature(CPU_CLMUL);
3035   if (sef_cpuid7_ebx.bits.rtm != 0)
3036     vm_features.set_feature(CPU_RTM);
3037   if (sef_cpuid7_ebx.bits.adx != 0)
3038      vm_features.set_feature(CPU_ADX);
3039   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3040     vm_features.set_feature(CPU_BMI2);
3041   if (sef_cpuid7_ebx.bits.sha != 0)
3042     vm_features.set_feature(CPU_SHA);
3043   if (std_cpuid1_ecx.bits.fma != 0)
3044     vm_features.set_feature(CPU_FMA);
3045   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3046     vm_features.set_feature(CPU_FLUSHOPT);
3047   if (sef_cpuid7_ebx.bits.clwb != 0)
3048     vm_features.set_feature(CPU_CLWB);
3049   if (ext_cpuid1_edx.bits.rdtscp != 0)
3050     vm_features.set_feature(CPU_RDTSCP);
3051   if (sef_cpuid7_ecx.bits.rdpid != 0)
3052     vm_features.set_feature(CPU_RDPID);
3053 
3054   // AMD|Hygon additional features.
3055   if (is_amd_family()) {
3056     // PREFETCHW was checked above, check TDNOW here.
3057     if ((ext_cpuid1_edx.bits.tdnow != 0))
3058       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3059     if (ext_cpuid1_ecx.bits.sse4a != 0)
3060       vm_features.set_feature(CPU_SSE4A);
3061   }
3062 
3063   // Intel additional features.
3064   if (is_intel()) {
3065     if (sef_cpuid7_edx.bits.serialize != 0)
3066       vm_features.set_feature(CPU_SERIALIZE);
3067     if (sef_cpuid7_edx.bits.hybrid != 0)
3068       vm_features.set_feature(CPU_HYBRID);
3069     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3070       vm_features.set_feature(CPU_AVX512_FP16);
3071   }
3072 
3073   // ZX additional features.
3074   if (is_zx()) {
3075     // We do not know if these are supported by ZX, so we cannot trust
3076     // common CPUID bit for them.
3077     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3078     vm_features.clear_feature(CPU_CLWB);
3079   }
3080 
3081   // Protection key features.
3082   if (sef_cpuid7_ecx.bits.pku != 0) {
3083     vm_features.set_feature(CPU_PKU);
3084   }
3085   if (sef_cpuid7_ecx.bits.ospke != 0) {
3086     vm_features.set_feature(CPU_OSPKE);
3087   }
3088 
3089   // Control flow enforcement (CET) features.
3090   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3091     vm_features.set_feature(CPU_CET_SS);
3092   }
3093   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3094     vm_features.set_feature(CPU_CET_IBT);
3095   }
3096 
3097   // Composite features.
3098   if (supports_tscinv_bit() &&
3099       ((is_amd_family() && !is_amd_Barcelona()) ||
3100        is_intel_tsc_synched_at_init())) {
3101     vm_features.set_feature(CPU_TSCINV);
3102   }
3103   return vm_features;
3104 }
3105 
3106 bool VM_Version::os_supports_avx_vectors() {
3107   bool retVal = false;
3108   int nreg = 4;
3109   if (supports_evex()) {
3110     // Verify that OS save/restore all bits of EVEX registers
3111     // during signal processing.
3112     retVal = true;
3113     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3114       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3115         retVal = false;
3116         break;
3117       }
3118     }
3119   } else if (supports_avx()) {
3120     // Verify that OS save/restore all bits of AVX registers
3121     // during signal processing.
3122     retVal = true;
3123     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3124       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3125         retVal = false;
3126         break;
3127       }
3128     }
3129     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3130     if (retVal == false) {
3131       // Verify that OS save/restore all bits of EVEX registers
3132       // during signal processing.
3133       retVal = true;
3134       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3135         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3136           retVal = false;
3137           break;
3138         }
3139       }
3140     }
3141   }
3142   return retVal;
3143 }
3144 
3145 bool VM_Version::os_supports_apx_egprs() {
3146   if (!supports_apx_f()) {
3147     return false;
3148   }
3149   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3150       _cpuid_info.apx_save[1] != egpr_test_value()) {
3151     return false;
3152   }
3153   return true;
3154 }
3155 
3156 uint VM_Version::cores_per_cpu() {
3157   uint result = 1;
3158   if (is_intel()) {
3159     bool supports_topology = supports_processor_topology();
3160     if (supports_topology) {
3161       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3162                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3163     }
3164     if (!supports_topology || result == 0) {
3165       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3166     }
3167   } else if (is_amd_family()) {
3168     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3169     if (cpu_family() >= 0x17) { // Zen or later
3170       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3171     }
3172   } else if (is_zx()) {
3173     bool supports_topology = supports_processor_topology();
3174     if (supports_topology) {
3175       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3176                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3177     }
3178     if (!supports_topology || result == 0) {
3179       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3180     }
3181   }
3182   return result;
3183 }
3184 
3185 uint VM_Version::threads_per_core() {
3186   uint result = 1;
3187   if (is_intel() && supports_processor_topology()) {
3188     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3189   } else if (is_zx() && supports_processor_topology()) {
3190     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3191   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3192     if (cpu_family() >= 0x17) {
3193       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3194     } else {
3195       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3196                  cores_per_cpu();
3197     }
3198   }
3199   return (result == 0 ? 1 : result);
3200 }
3201 
3202 uint VM_Version::L1_line_size() {
3203   uint result = 0;
3204   if (is_intel()) {
3205     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3206   } else if (is_amd_family()) {
3207     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3208   } else if (is_zx()) {
3209     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3210   }
3211   if (result < 32) // not defined ?
3212     result = 32;   // 32 bytes by default on x86 and other x64
3213   return result;
3214 }
3215 
3216 bool VM_Version::is_intel_tsc_synched_at_init() {
3217   if (is_intel_family_core()) {
3218     uint32_t ext_model = extended_cpu_model();
3219     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3220         ext_model == CPU_MODEL_WESTMERE_EP    ||
3221         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3222         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3223       // <= 2-socket invariant tsc support. EX versions are usually used
3224       // in > 2-socket systems and likely don't synchronize tscs at
3225       // initialization.
3226       // Code that uses tsc values must be prepared for them to arbitrarily
3227       // jump forward or backward.
3228       return true;
3229     }
3230   }
3231   return false;
3232 }
3233 
3234 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3235   // Hardware prefetching (distance/size in bytes):
3236   // Pentium 3 -  64 /  32
3237   // Pentium 4 - 256 / 128
3238   // Athlon    -  64 /  32 ????
3239   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3240   // Core      - 128 /  64
3241   //
3242   // Software prefetching (distance in bytes / instruction with best score):
3243   // Pentium 3 - 128 / prefetchnta
3244   // Pentium 4 - 512 / prefetchnta
3245   // Athlon    - 128 / prefetchnta
3246   // Opteron   - 256 / prefetchnta
3247   // Core      - 256 / prefetchnta
3248   // It will be used only when AllocatePrefetchStyle > 0
3249 
3250   if (is_amd_family()) { // AMD | Hygon
3251     if (supports_sse2()) {
3252       return 256; // Opteron
3253     } else {
3254       return 128; // Athlon
3255     }
3256   } else { // Intel
3257     if (supports_sse3() && is_intel_server_family()) {
3258       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3259         return 192;
3260       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3261         return 384;
3262       }
3263     }
3264     if (supports_sse2()) {
3265       if (is_intel_server_family()) {
3266         return 256; // Pentium M, Core, Core2
3267       } else {
3268         return 512; // Pentium 4
3269       }
3270     } else {
3271       return 128; // Pentium 3 (and all other old CPUs)
3272     }
3273   }
3274 }
3275 
3276 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3277   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3278   switch (id) {
3279   case vmIntrinsics::_floatToFloat16:
3280   case vmIntrinsics::_float16ToFloat:
3281     if (!supports_float16()) {
3282       return false;
3283     }
3284     break;
3285   default:
3286     break;
3287   }
3288   return true;
3289 }
3290 
3291 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3292   int i = 0;
3293   ss.join([&]() {
3294     while (i < MAX_CPU_FEATURES) {
3295       if (_features.supports_feature((VM_Version::Feature_Flag)i)) {
3296         return _features_names[i++];
3297       }
3298       i += 1;
3299     }
3300     return (const char*)nullptr;
3301   }, ", ");
3302 }