1 /*
   2  * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2550;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76   typedef void (*getCPUIDBrandString_stub_t)(void*);
  77 }
  78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  79 static detect_virt_stub_t detect_virt_stub = nullptr;
  80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
  82 
  83 bool VM_Version::supports_clflush() {
  84   // clflush should always be available on x86_64
  85   // if not we are in real trouble because we rely on it
  86   // to flush the code cache.
  87   // Unfortunately, Assembler::clflush is currently called as part
  88   // of generation of the code cache flush routine. This happens
  89   // under Universe::init before the processor features are set
  90   // up. Assembler::flush calls this routine to check that clflush
  91   // is allowed. So, we give the caller a free pass if Universe init
  92   // is still in progress.
  93   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  94   return true;
  95 }
  96 
  97 #define CPUID_STANDARD_FN   0x0
  98 #define CPUID_STANDARD_FN_1 0x1
  99 #define CPUID_STANDARD_FN_4 0x4
 100 #define CPUID_STANDARD_FN_B 0xb
 101 
 102 #define CPUID_EXTENDED_FN   0x80000000
 103 #define CPUID_EXTENDED_FN_1 0x80000001
 104 #define CPUID_EXTENDED_FN_2 0x80000002
 105 #define CPUID_EXTENDED_FN_3 0x80000003
 106 #define CPUID_EXTENDED_FN_4 0x80000004
 107 #define CPUID_EXTENDED_FN_7 0x80000007
 108 #define CPUID_EXTENDED_FN_8 0x80000008
 109 
 110 class VM_Version_StubGenerator: public StubCodeGenerator {
 111  public:
 112 
 113   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 114 
 115   address clear_apx_test_state() {
 116 #   define __ _masm->
 117     address start = __ pc();
 118     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 119     // handling guarantees that preserved register values post signal handling were
 120     // re-instantiated by operating system and not because they were not modified externally.
 121 
 122     bool save_apx = UseAPX;
 123     VM_Version::set_apx_cpuFeatures();
 124     UseAPX = true;
 125     // EGPR state save/restoration.
 126     __ mov64(r16, 0L);
 127     __ mov64(r31, 0L);
 128     UseAPX = save_apx;
 129     VM_Version::clean_cpuFeatures();
 130     __ ret(0);
 131     return start;
 132   }
 133 
 134   address generate_get_cpu_info() {
 135     // Flags to test CPU type.
 136     const uint32_t HS_EFL_AC = 0x40000;
 137     const uint32_t HS_EFL_ID = 0x200000;
 138     // Values for when we don't have a CPUID instruction.
 139     const int      CPU_FAMILY_SHIFT = 8;
 140     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 141     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 142     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 143 
 144     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 145     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 146     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
 147     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 148 
 149     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 150 #   define __ _masm->
 151 
 152     address start = __ pc();
 153 
 154     //
 155     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 156     //
 157     // rcx and rdx are first and second argument registers on windows
 158 
 159     __ push(rbp);
 160     __ mov(rbp, c_rarg0); // cpuid_info address
 161     __ push(rbx);
 162     __ push(rsi);
 163     __ pushf();          // preserve rbx, and flags
 164     __ pop(rax);
 165     __ push(rax);
 166     __ mov(rcx, rax);
 167     //
 168     // if we are unable to change the AC flag, we have a 386
 169     //
 170     __ xorl(rax, HS_EFL_AC);
 171     __ push(rax);
 172     __ popf();
 173     __ pushf();
 174     __ pop(rax);
 175     __ cmpptr(rax, rcx);
 176     __ jccb(Assembler::notEqual, detect_486);
 177 
 178     __ movl(rax, CPU_FAMILY_386);
 179     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 180     __ jmp(done);
 181 
 182     //
 183     // If we are unable to change the ID flag, we have a 486 which does
 184     // not support the "cpuid" instruction.
 185     //
 186     __ bind(detect_486);
 187     __ mov(rax, rcx);
 188     __ xorl(rax, HS_EFL_ID);
 189     __ push(rax);
 190     __ popf();
 191     __ pushf();
 192     __ pop(rax);
 193     __ cmpptr(rcx, rax);
 194     __ jccb(Assembler::notEqual, detect_586);
 195 
 196     __ bind(cpu486);
 197     __ movl(rax, CPU_FAMILY_486);
 198     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 199     __ jmp(done);
 200 
 201     //
 202     // At this point, we have a chip which supports the "cpuid" instruction
 203     //
 204     __ bind(detect_586);
 205     __ xorl(rax, rax);
 206     __ cpuid();
 207     __ orl(rax, rax);
 208     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 209                                         // value of at least 1, we give up and
 210                                         // assume a 486
 211     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 212     __ movl(Address(rsi, 0), rax);
 213     __ movl(Address(rsi, 4), rbx);
 214     __ movl(Address(rsi, 8), rcx);
 215     __ movl(Address(rsi,12), rdx);
 216 
 217     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 218     __ jccb(Assembler::belowEqual, std_cpuid4);
 219 
 220     //
 221     // cpuid(0xB) Processor Topology
 222     //
 223     __ movl(rax, 0xb);
 224     __ xorl(rcx, rcx);   // Threads level
 225     __ cpuid();
 226 
 227     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 228     __ movl(Address(rsi, 0), rax);
 229     __ movl(Address(rsi, 4), rbx);
 230     __ movl(Address(rsi, 8), rcx);
 231     __ movl(Address(rsi,12), rdx);
 232 
 233     __ movl(rax, 0xb);
 234     __ movl(rcx, 1);     // Cores level
 235     __ cpuid();
 236     __ push(rax);
 237     __ andl(rax, 0x1f);  // Determine if valid topology level
 238     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 239     __ andl(rax, 0xffff);
 240     __ pop(rax);
 241     __ jccb(Assembler::equal, std_cpuid4);
 242 
 243     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 244     __ movl(Address(rsi, 0), rax);
 245     __ movl(Address(rsi, 4), rbx);
 246     __ movl(Address(rsi, 8), rcx);
 247     __ movl(Address(rsi,12), rdx);
 248 
 249     __ movl(rax, 0xb);
 250     __ movl(rcx, 2);     // Packages level
 251     __ cpuid();
 252     __ push(rax);
 253     __ andl(rax, 0x1f);  // Determine if valid topology level
 254     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 255     __ andl(rax, 0xffff);
 256     __ pop(rax);
 257     __ jccb(Assembler::equal, std_cpuid4);
 258 
 259     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 260     __ movl(Address(rsi, 0), rax);
 261     __ movl(Address(rsi, 4), rbx);
 262     __ movl(Address(rsi, 8), rcx);
 263     __ movl(Address(rsi,12), rdx);
 264 
 265     //
 266     // cpuid(0x4) Deterministic cache params
 267     //
 268     __ bind(std_cpuid4);
 269     __ movl(rax, 4);
 270     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 271     __ jccb(Assembler::greater, std_cpuid1);
 272 
 273     __ xorl(rcx, rcx);   // L1 cache
 274     __ cpuid();
 275     __ push(rax);
 276     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 277     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 278     __ pop(rax);
 279     __ jccb(Assembler::equal, std_cpuid1);
 280 
 281     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 282     __ movl(Address(rsi, 0), rax);
 283     __ movl(Address(rsi, 4), rbx);
 284     __ movl(Address(rsi, 8), rcx);
 285     __ movl(Address(rsi,12), rdx);
 286 
 287     //
 288     // Standard cpuid(0x1)
 289     //
 290     __ bind(std_cpuid1);
 291     __ movl(rax, 1);
 292     __ cpuid();
 293     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 294     __ movl(Address(rsi, 0), rax);
 295     __ movl(Address(rsi, 4), rbx);
 296     __ movl(Address(rsi, 8), rcx);
 297     __ movl(Address(rsi,12), rdx);
 298 
 299     //
 300     // Check if OS has enabled XGETBV instruction to access XCR0
 301     // (OSXSAVE feature flag) and CPU supports AVX
 302     //
 303     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 304     __ cmpl(rcx, 0x18000000);
 305     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 306 
 307     //
 308     // XCR0, XFEATURE_ENABLED_MASK register
 309     //
 310     __ xorl(rcx, rcx);   // zero for XCR0 register
 311     __ xgetbv();
 312     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 313     __ movl(Address(rsi, 0), rax);
 314     __ movl(Address(rsi, 4), rdx);
 315 
 316     //
 317     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 318     //
 319     __ bind(sef_cpuid);
 320     __ movl(rax, 7);
 321     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 322     __ jccb(Assembler::greater, ext_cpuid);
 323     // ECX = 0
 324     __ xorl(rcx, rcx);
 325     __ cpuid();
 326     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 327     __ movl(Address(rsi, 0), rax);
 328     __ movl(Address(rsi, 4), rbx);
 329     __ movl(Address(rsi, 8), rcx);
 330     __ movl(Address(rsi, 12), rdx);
 331 
 332     //
 333     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 334     //
 335     __ bind(sefsl1_cpuid);
 336     __ movl(rax, 7);
 337     __ movl(rcx, 1);
 338     __ cpuid();
 339     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 340     __ movl(Address(rsi, 0), rax);
 341     __ movl(Address(rsi, 4), rdx);
 342 
 343     //
 344     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 345     //
 346     __ bind(std_cpuid29);
 347     __ movl(rax, 0x29);
 348     __ movl(rcx, 0);
 349     __ cpuid();
 350     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 351     __ movl(Address(rsi, 0), rbx);
 352 
 353     //
 354     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 355     //
 356     __ bind(std_cpuid24);
 357     __ movl(rax, 0x24);
 358     __ movl(rcx, 0);
 359     __ cpuid();
 360     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 361     __ movl(Address(rsi, 0), rax);
 362     __ movl(Address(rsi, 4), rbx);
 363 
 364     //
 365     // Extended cpuid(0x80000000)
 366     //
 367     __ bind(ext_cpuid);
 368     __ movl(rax, 0x80000000);
 369     __ cpuid();
 370     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 371     __ jcc(Assembler::belowEqual, done);
 372     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 373     __ jcc(Assembler::belowEqual, ext_cpuid1);
 374     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 375     __ jccb(Assembler::belowEqual, ext_cpuid5);
 376     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 377     __ jccb(Assembler::belowEqual, ext_cpuid7);
 378     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 379     __ jccb(Assembler::belowEqual, ext_cpuid8);
 380     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 381     __ jccb(Assembler::below, ext_cpuid8);
 382     //
 383     // Extended cpuid(0x8000001E)
 384     //
 385     __ movl(rax, 0x8000001E);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000008)
 395     //
 396     __ bind(ext_cpuid8);
 397     __ movl(rax, 0x80000008);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000007)
 407     //
 408     __ bind(ext_cpuid7);
 409     __ movl(rax, 0x80000007);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000005)
 419     //
 420     __ bind(ext_cpuid5);
 421     __ movl(rax, 0x80000005);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Extended cpuid(0x80000001)
 431     //
 432     __ bind(ext_cpuid1);
 433     __ movl(rax, 0x80000001);
 434     __ cpuid();
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 436     __ movl(Address(rsi, 0), rax);
 437     __ movl(Address(rsi, 4), rbx);
 438     __ movl(Address(rsi, 8), rcx);
 439     __ movl(Address(rsi,12), rdx);
 440 
 441     //
 442     // Check if OS has enabled XGETBV instruction to access XCR0
 443     // (OSXSAVE feature flag) and CPU supports APX
 444     //
 445     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 446     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 447     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 448     __ movl(rax, 0x200000);
 449     __ andl(rax, Address(rsi, 4));
 450     __ jcc(Assembler::equal, vector_save_restore);
 451     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 452     __ movl(rax, 0x80000);
 453     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 454     __ jcc(Assembler::equal, vector_save_restore);
 455 
 456     bool save_apx = UseAPX;
 457     VM_Version::set_apx_cpuFeatures();
 458     UseAPX = true;
 459     __ mov64(r16, VM_Version::egpr_test_value());
 460     __ mov64(r31, VM_Version::egpr_test_value());
 461     __ xorl(rsi, rsi);
 462     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 463     // Generate SEGV
 464     __ movl(rax, Address(rsi, 0));
 465 
 466     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 467     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 468     __ movq(Address(rsi, 0), r16);
 469     __ movq(Address(rsi, 8), r31);
 470 
 471     //
 472     // Query CPUID 0xD.19 for APX XSAVE offset
 473     // Extended State Enumeration Sub-leaf 19 (APX)
 474     // EAX = size of APX state (should be 128)
 475     // EBX = offset in standard XSAVE format
 476     //
 477     __ movl(rax, 0xD);
 478     __ movl(rcx, 19);
 479     __ cpuid();
 480     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
 481     __ movl(Address(rsi, 0), rax);
 482     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
 483     __ movl(Address(rsi, 0), rbx);
 484 
 485     UseAPX = save_apx;
 486     __ bind(vector_save_restore);
 487     //
 488     // Check if OS has enabled XGETBV instruction to access XCR0
 489     // (OSXSAVE feature flag) and CPU supports AVX
 490     //
 491     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 492     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 493     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 494     __ cmpl(rcx, 0x18000000);
 495     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 496 
 497     __ movl(rax, 0x6);
 498     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 499     __ cmpl(rax, 0x6);
 500     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 501 
 502     // we need to bridge farther than imm8, so we use this island as a thunk
 503     __ bind(done);
 504     __ jmp(wrapup);
 505 
 506     __ bind(start_simd_check);
 507     //
 508     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 509     // registers are not restored after a signal processing.
 510     // Generate SEGV here (reference through null)
 511     // and check upper YMM/ZMM bits after it.
 512     //
 513     int saved_useavx = UseAVX;
 514     int saved_usesse = UseSSE;
 515 
 516     // If UseAVX is uninitialized or is set by the user to include EVEX
 517     if (use_evex) {
 518       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 519       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 520       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 521       __ movl(rax, 0x10000);
 522       __ andl(rax, Address(rsi, 4));
 523       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 524       __ movl(rbx, 0x80000);
 525       __ andl(rbx, Address(rsi, 4));
 526       __ orl(rax, rbx);
 527       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 528       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 529       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 530       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 531       __ movl(rax, 0xE0);
 532       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 533       __ cmpl(rax, 0xE0);
 534       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 535 
 536       if (FLAG_IS_DEFAULT(UseAVX)) {
 537         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 538         __ movl(rax, Address(rsi, 0));
 539         __ cmpl(rax, 0x50654);              // If it is Skylake
 540         __ jcc(Assembler::equal, legacy_setup);
 541       }
 542       // EVEX setup: run in lowest evex mode
 543       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 544       UseAVX = 3;
 545       UseSSE = 2;
 546 #ifdef _WINDOWS
 547       // xmm5-xmm15 are not preserved by caller on windows
 548       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 549       __ subptr(rsp, 64);
 550       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 551       __ subptr(rsp, 64);
 552       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 553       __ subptr(rsp, 64);
 554       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 555 #endif // _WINDOWS
 556 
 557       // load value into all 64 bytes of zmm7 register
 558       __ movl(rcx, VM_Version::ymm_test_value());
 559       __ movdl(xmm0, rcx);
 560       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 561       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 562       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 563       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 564       VM_Version::clean_cpuFeatures();
 565       __ jmp(save_restore_except);
 566     }
 567 
 568     __ bind(legacy_setup);
 569     // AVX setup
 570     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 571     UseAVX = 1;
 572     UseSSE = 2;
 573 #ifdef _WINDOWS
 574     __ subptr(rsp, 32);
 575     __ vmovdqu(Address(rsp, 0), xmm7);
 576     __ subptr(rsp, 32);
 577     __ vmovdqu(Address(rsp, 0), xmm8);
 578     __ subptr(rsp, 32);
 579     __ vmovdqu(Address(rsp, 0), xmm15);
 580 #endif // _WINDOWS
 581 
 582     // load value into all 32 bytes of ymm7 register
 583     __ movl(rcx, VM_Version::ymm_test_value());
 584 
 585     __ movdl(xmm0, rcx);
 586     __ pshufd(xmm0, xmm0, 0x00);
 587     __ vinsertf128_high(xmm0, xmm0);
 588     __ vmovdqu(xmm7, xmm0);
 589     __ vmovdqu(xmm8, xmm0);
 590     __ vmovdqu(xmm15, xmm0);
 591     VM_Version::clean_cpuFeatures();
 592 
 593     __ bind(save_restore_except);
 594     __ xorl(rsi, rsi);
 595     VM_Version::set_cpuinfo_segv_addr(__ pc());
 596     // Generate SEGV
 597     __ movl(rax, Address(rsi, 0));
 598 
 599     VM_Version::set_cpuinfo_cont_addr(__ pc());
 600     // Returns here after signal. Save xmm0 to check it later.
 601 
 602     // If UseAVX is uninitialized or is set by the user to include EVEX
 603     if (use_evex) {
 604       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 605       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 606       __ movl(rax, 0x10000);
 607       __ andl(rax, Address(rsi, 4));
 608       __ jcc(Assembler::equal, legacy_save_restore);
 609       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 610       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 611       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 612       __ movl(rax, 0xE0);
 613       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 614       __ cmpl(rax, 0xE0);
 615       __ jcc(Assembler::notEqual, legacy_save_restore);
 616 
 617       if (FLAG_IS_DEFAULT(UseAVX)) {
 618         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 619         __ movl(rax, Address(rsi, 0));
 620         __ cmpl(rax, 0x50654);              // If it is Skylake
 621         __ jcc(Assembler::equal, legacy_save_restore);
 622       }
 623       // EVEX check: run in lowest evex mode
 624       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 625       UseAVX = 3;
 626       UseSSE = 2;
 627       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 628       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 629       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 630       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 631       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 632 
 633 #ifdef _WINDOWS
 634       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 635       __ addptr(rsp, 64);
 636       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 637       __ addptr(rsp, 64);
 638       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 639       __ addptr(rsp, 64);
 640 #endif // _WINDOWS
 641       generate_vzeroupper(wrapup);
 642       VM_Version::clean_cpuFeatures();
 643       UseAVX = saved_useavx;
 644       UseSSE = saved_usesse;
 645       __ jmp(wrapup);
 646    }
 647 
 648     __ bind(legacy_save_restore);
 649     // AVX check
 650     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 651     UseAVX = 1;
 652     UseSSE = 2;
 653     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 654     __ vmovdqu(Address(rsi, 0), xmm0);
 655     __ vmovdqu(Address(rsi, 32), xmm7);
 656     __ vmovdqu(Address(rsi, 64), xmm8);
 657     __ vmovdqu(Address(rsi, 96), xmm15);
 658 
 659 #ifdef _WINDOWS
 660     __ vmovdqu(xmm15, Address(rsp, 0));
 661     __ addptr(rsp, 32);
 662     __ vmovdqu(xmm8, Address(rsp, 0));
 663     __ addptr(rsp, 32);
 664     __ vmovdqu(xmm7, Address(rsp, 0));
 665     __ addptr(rsp, 32);
 666 #endif // _WINDOWS
 667 
 668     generate_vzeroupper(wrapup);
 669     VM_Version::clean_cpuFeatures();
 670     UseAVX = saved_useavx;
 671     UseSSE = saved_usesse;
 672 
 673     __ bind(wrapup);
 674     __ popf();
 675     __ pop(rsi);
 676     __ pop(rbx);
 677     __ pop(rbp);
 678     __ ret(0);
 679 
 680 #   undef __
 681 
 682     return start;
 683   };
 684   void generate_vzeroupper(Label& L_wrapup) {
 685 #   define __ _masm->
 686     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 687     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 688     __ jcc(Assembler::notEqual, L_wrapup);
 689     __ movl(rcx, 0x0FFF0FF0);
 690     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 691     __ andl(rcx, Address(rsi, 0));
 692     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 693     __ jcc(Assembler::equal, L_wrapup);
 694     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 695     __ jcc(Assembler::equal, L_wrapup);
 696     // vzeroupper() will use a pre-computed instruction sequence that we
 697     // can't compute until after we've determined CPU capabilities. Use
 698     // uncached variant here directly to be able to bootstrap correctly
 699     __ vzeroupper_uncached();
 700 #   undef __
 701   }
 702   address generate_detect_virt() {
 703     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 704 #   define __ _masm->
 705 
 706     address start = __ pc();
 707 
 708     // Evacuate callee-saved registers
 709     __ push(rbp);
 710     __ push(rbx);
 711     __ push(rsi); // for Windows
 712 
 713     __ mov(rax, c_rarg0); // CPUID leaf
 714     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 715 
 716     __ cpuid();
 717 
 718     // Store result to register array
 719     __ movl(Address(rsi,  0), rax);
 720     __ movl(Address(rsi,  4), rbx);
 721     __ movl(Address(rsi,  8), rcx);
 722     __ movl(Address(rsi, 12), rdx);
 723 
 724     // Epilogue
 725     __ pop(rsi);
 726     __ pop(rbx);
 727     __ pop(rbp);
 728     __ ret(0);
 729 
 730 #   undef __
 731 
 732     return start;
 733   };
 734 
 735 
 736   address generate_getCPUIDBrandString(void) {
 737     // Flags to test CPU type.
 738     const uint32_t HS_EFL_AC           = 0x40000;
 739     const uint32_t HS_EFL_ID           = 0x200000;
 740     // Values for when we don't have a CPUID instruction.
 741     const int      CPU_FAMILY_SHIFT = 8;
 742     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 743     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 744 
 745     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 746 
 747     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 748 #   define __ _masm->
 749 
 750     address start = __ pc();
 751 
 752     //
 753     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 754     //
 755     // rcx and rdx are first and second argument registers on windows
 756 
 757     __ push(rbp);
 758     __ mov(rbp, c_rarg0); // cpuid_info address
 759     __ push(rbx);
 760     __ push(rsi);
 761     __ pushf();          // preserve rbx, and flags
 762     __ pop(rax);
 763     __ push(rax);
 764     __ mov(rcx, rax);
 765     //
 766     // if we are unable to change the AC flag, we have a 386
 767     //
 768     __ xorl(rax, HS_EFL_AC);
 769     __ push(rax);
 770     __ popf();
 771     __ pushf();
 772     __ pop(rax);
 773     __ cmpptr(rax, rcx);
 774     __ jccb(Assembler::notEqual, detect_486);
 775 
 776     __ movl(rax, CPU_FAMILY_386);
 777     __ jmp(done);
 778 
 779     //
 780     // If we are unable to change the ID flag, we have a 486 which does
 781     // not support the "cpuid" instruction.
 782     //
 783     __ bind(detect_486);
 784     __ mov(rax, rcx);
 785     __ xorl(rax, HS_EFL_ID);
 786     __ push(rax);
 787     __ popf();
 788     __ pushf();
 789     __ pop(rax);
 790     __ cmpptr(rcx, rax);
 791     __ jccb(Assembler::notEqual, detect_586);
 792 
 793     __ bind(cpu486);
 794     __ movl(rax, CPU_FAMILY_486);
 795     __ jmp(done);
 796 
 797     //
 798     // At this point, we have a chip which supports the "cpuid" instruction
 799     //
 800     __ bind(detect_586);
 801     __ xorl(rax, rax);
 802     __ cpuid();
 803     __ orl(rax, rax);
 804     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 805                                         // value of at least 1, we give up and
 806                                         // assume a 486
 807 
 808     //
 809     // Extended cpuid(0x80000000) for processor brand string detection
 810     //
 811     __ bind(ext_cpuid);
 812     __ movl(rax, CPUID_EXTENDED_FN);
 813     __ cpuid();
 814     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 815     __ jcc(Assembler::below, done);
 816 
 817     //
 818     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 819     //
 820     __ movl(rax, CPUID_EXTENDED_FN_2);
 821     __ cpuid();
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 823     __ movl(Address(rsi, 0), rax);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 825     __ movl(Address(rsi, 0), rbx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 827     __ movl(Address(rsi, 0), rcx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 829     __ movl(Address(rsi,0), rdx);
 830 
 831     //
 832     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 833     //
 834     __ movl(rax, CPUID_EXTENDED_FN_3);
 835     __ cpuid();
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 837     __ movl(Address(rsi, 0), rax);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 839     __ movl(Address(rsi, 0), rbx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 841     __ movl(Address(rsi, 0), rcx);
 842     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 843     __ movl(Address(rsi,0), rdx);
 844 
 845     //
 846     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 847     //
 848     __ movl(rax, CPUID_EXTENDED_FN_4);
 849     __ cpuid();
 850     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 851     __ movl(Address(rsi, 0), rax);
 852     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 853     __ movl(Address(rsi, 0), rbx);
 854     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 855     __ movl(Address(rsi, 0), rcx);
 856     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 857     __ movl(Address(rsi,0), rdx);
 858 
 859     //
 860     // return
 861     //
 862     __ bind(done);
 863     __ popf();
 864     __ pop(rsi);
 865     __ pop(rbx);
 866     __ pop(rbp);
 867     __ ret(0);
 868 
 869 #   undef __
 870 
 871     return start;
 872   };
 873 };
 874 
 875 void VM_Version::get_processor_features() {
 876 
 877   _cpu = 4; // 486 by default
 878   _model = 0;
 879   _stepping = 0;
 880   _logical_processors_per_package = 1;
 881   // i486 internal cache is both I&D and has a 16-byte line size
 882   _L1_data_cache_line_size = 16;
 883 
 884   // Get raw processor info
 885 
 886   get_cpu_info_stub(&_cpuid_info);
 887 
 888   assert_is_initialized();
 889   _cpu = extended_cpu_family();
 890   _model = extended_cpu_model();
 891   _stepping = cpu_stepping();
 892 
 893   if (cpu_family() > 4) { // it supports CPUID
 894     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 895     _cpu_features = _features; // Preserve features
 896     // Logical processors are only available on P4s and above,
 897     // and only if hyperthreading is available.
 898     _logical_processors_per_package = logical_processor_count();
 899     _L1_data_cache_line_size = L1_line_size();
 900   }
 901 
 902   // xchg and xadd instructions
 903   _supports_atomic_getset4 = true;
 904   _supports_atomic_getadd4 = true;
 905   _supports_atomic_getset8 = true;
 906   _supports_atomic_getadd8 = true;
 907 
 908   // OS should support SSE for x64 and hardware should support at least SSE2.
 909   if (!VM_Version::supports_sse2()) {
 910     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 911   }
 912   // in 64 bit the use of SSE2 is the minimum
 913   if (UseSSE < 2) UseSSE = 2;
 914 
 915   // flush_icache_stub have to be generated first.
 916   // That is why Icache line size is hard coded in ICache class,
 917   // see icache_x86.hpp. It is also the reason why we can't use
 918   // clflush instruction in 32-bit VM since it could be running
 919   // on CPU which does not support it.
 920   //
 921   // The only thing we can do is to verify that flushed
 922   // ICache::line_size has correct value.
 923   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 924   // clflush_size is size in quadwords (8 bytes).
 925   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 926 
 927   // assigning this field effectively enables Unsafe.writebackMemory()
 928   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 929   // that is only implemented on x86_64 and only if the OS plays ball
 930   if (os::supports_map_sync()) {
 931     // publish data cache line flush size to generic field, otherwise
 932     // let if default to zero thereby disabling writeback
 933     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 934   }
 935 
 936   // Check if processor has Intel Ecore
 937   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 938     (supports_hybrid() ||
 939      _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
 940      _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
 941     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 942   }
 943 
 944   if (UseSSE < 4) {
 945     _features.clear_feature(CPU_SSE4_1);
 946     _features.clear_feature(CPU_SSE4_2);
 947   }
 948 
 949   if (UseSSE < 3) {
 950     _features.clear_feature(CPU_SSE3);
 951     _features.clear_feature(CPU_SSSE3);
 952     _features.clear_feature(CPU_SSE4A);
 953   }
 954 
 955   if (UseSSE < 2)
 956     _features.clear_feature(CPU_SSE2);
 957 
 958   if (UseSSE < 1)
 959     _features.clear_feature(CPU_SSE);
 960 
 961   // ZX cpus specific settings
 962   if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
 963     if (cpu_family() == 7) {
 964       if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
 965         UseAVX = 1;
 966       } else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
 967         UseAVX = 0;
 968       }
 969     } else if (cpu_family() == 6) {
 970       UseAVX = 0;
 971     }
 972   }
 973 
 974   // UseSSE is set to the smaller of what hardware supports and what
 975   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 976   // older Pentiums which do not support it.
 977   int use_sse_limit = 0;
 978   if (UseSSE > 0) {
 979     if (UseSSE > 3 && supports_sse4_1()) {
 980       use_sse_limit = 4;
 981     } else if (UseSSE > 2 && supports_sse3()) {
 982       use_sse_limit = 3;
 983     } else if (UseSSE > 1 && supports_sse2()) {
 984       use_sse_limit = 2;
 985     } else if (UseSSE > 0 && supports_sse()) {
 986       use_sse_limit = 1;
 987     } else {
 988       use_sse_limit = 0;
 989     }
 990   }
 991   if (FLAG_IS_DEFAULT(UseSSE)) {
 992     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 993   } else if (UseSSE > use_sse_limit) {
 994     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 995     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 996   }
 997 
 998   // first try initial setting and detect what we can support
 999   int use_avx_limit = 0;
1000   if (UseAVX > 0) {
1001     if (UseSSE < 4) {
1002       // Don't use AVX if SSE is unavailable or has been disabled.
1003       use_avx_limit = 0;
1004     } else if (UseAVX > 2 && supports_evex()) {
1005       use_avx_limit = 3;
1006     } else if (UseAVX > 1 && supports_avx2()) {
1007       use_avx_limit = 2;
1008     } else if (UseAVX > 0 && supports_avx()) {
1009       use_avx_limit = 1;
1010     } else {
1011       use_avx_limit = 0;
1012     }
1013   }
1014   if (FLAG_IS_DEFAULT(UseAVX)) {
1015     // Don't use AVX-512 on older Skylakes unless explicitly requested.
1016     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1017       FLAG_SET_DEFAULT(UseAVX, 2);
1018     } else {
1019       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1020     }
1021   }
1022 
1023   if (UseAVX > use_avx_limit) {
1024     if (UseSSE < 4) {
1025       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1026     } else {
1027       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1028     }
1029     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1030   }
1031 
1032   if (UseAVX < 3) {
1033     _features.clear_feature(CPU_AVX512F);
1034     _features.clear_feature(CPU_AVX512DQ);
1035     _features.clear_feature(CPU_AVX512CD);
1036     _features.clear_feature(CPU_AVX512BW);
1037     _features.clear_feature(CPU_AVX512ER);
1038     _features.clear_feature(CPU_AVX512PF);
1039     _features.clear_feature(CPU_AVX512VL);
1040     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1041     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1042     _features.clear_feature(CPU_AVX512_VAES);
1043     _features.clear_feature(CPU_AVX512_VNNI);
1044     _features.clear_feature(CPU_AVX512_VBMI);
1045     _features.clear_feature(CPU_AVX512_VBMI2);
1046     _features.clear_feature(CPU_AVX512_BITALG);
1047     _features.clear_feature(CPU_AVX512_IFMA);
1048     _features.clear_feature(CPU_APX_F);
1049     _features.clear_feature(CPU_AVX512_FP16);
1050     _features.clear_feature(CPU_AVX10_1);
1051     _features.clear_feature(CPU_AVX10_2);
1052   }
1053 
1054 
1055   if (UseAVX < 2) {
1056     _features.clear_feature(CPU_AVX2);
1057     _features.clear_feature(CPU_AVX_IFMA);
1058   }
1059 
1060   if (UseAVX < 1) {
1061     _features.clear_feature(CPU_AVX);
1062     _features.clear_feature(CPU_VZEROUPPER);
1063     _features.clear_feature(CPU_F16C);
1064     _features.clear_feature(CPU_SHA512);
1065   }
1066 
1067   if (logical_processors_per_package() == 1) {
1068     // HT processor could be installed on a system which doesn't support HT.
1069     _features.clear_feature(CPU_HT);
1070   }
1071 
1072   if (is_intel()) { // Intel cpus specific settings
1073     if (is_knights_family()) {
1074       _features.clear_feature(CPU_VZEROUPPER);
1075       _features.clear_feature(CPU_AVX512BW);
1076       _features.clear_feature(CPU_AVX512VL);
1077       _features.clear_feature(CPU_APX_F);
1078       _features.clear_feature(CPU_AVX512DQ);
1079       _features.clear_feature(CPU_AVX512_VNNI);
1080       _features.clear_feature(CPU_AVX512_VAES);
1081       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1082       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1083       _features.clear_feature(CPU_AVX512_VBMI);
1084       _features.clear_feature(CPU_AVX512_VBMI2);
1085       _features.clear_feature(CPU_CLWB);
1086       _features.clear_feature(CPU_FLUSHOPT);
1087       _features.clear_feature(CPU_GFNI);
1088       _features.clear_feature(CPU_AVX512_BITALG);
1089       _features.clear_feature(CPU_AVX512_IFMA);
1090       _features.clear_feature(CPU_AVX_IFMA);
1091       _features.clear_feature(CPU_AVX512_FP16);
1092       _features.clear_feature(CPU_AVX10_1);
1093       _features.clear_feature(CPU_AVX10_2);
1094     }
1095   }
1096 
1097   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1098   if (supports_apx_f() && os_supports_apx_egprs() && supports_avx512vl()) {
1099     if (FLAG_IS_DEFAULT(UseAPX)) {
1100       UseAPX = false; // by default UseAPX is false
1101       _features.clear_feature(CPU_APX_F);
1102     } else if (!UseAPX) {
1103       _features.clear_feature(CPU_APX_F);
1104     }
1105   } else if (UseAPX) {
1106     if (!FLAG_IS_DEFAULT(UseAPX)) {
1107       warning("APX is not supported on this CPU, setting it to false)");
1108     }
1109     FLAG_SET_DEFAULT(UseAPX, false);
1110   }
1111 
1112   CHECK_CPU_FEATURE(supports_clmul, CLMUL);
1113   CHECK_CPU_FEATURE(supports_aes, AES);
1114   CHECK_CPU_FEATURE(supports_fma, FMA);
1115 
1116   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1117     if (FLAG_IS_DEFAULT(UseSHA)) {
1118       UseSHA = true;
1119     } else if (!UseSHA) {
1120       _features.clear_feature(CPU_SHA);
1121     }
1122   } else if (UseSHA) {
1123     if (!FLAG_IS_DEFAULT(UseSHA)) {
1124       warning("SHA instructions are not available on this CPU");
1125     }
1126     FLAG_SET_DEFAULT(UseSHA, false);
1127   }
1128 
1129   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1130     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1131     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1132   } else {
1133     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1134   }
1135 
1136   assert(supports_clflush(), "Always present");
1137   if (X86ICacheSync == -1) {
1138     // Auto-detect, choosing the best performant one that still flushes
1139     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1140     if (supports_clwb()) {
1141       FLAG_SET_ERGO(X86ICacheSync, 3);
1142     } else if (supports_clflushopt()) {
1143       FLAG_SET_ERGO(X86ICacheSync, 2);
1144     } else {
1145       FLAG_SET_ERGO(X86ICacheSync, 1);
1146     }
1147   } else {
1148     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1149       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1150     }
1151     if ((X86ICacheSync == 3) && !supports_clwb()) {
1152       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1153     }
1154     if ((X86ICacheSync == 5) && !supports_serialize()) {
1155       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1156     }
1157   }
1158 
1159   stringStream ss(2048);
1160   if (supports_hybrid()) {
1161     ss.print("(hybrid)");
1162   } else {
1163     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1164   }
1165   ss.print(" family %d model %d stepping %d microcode 0x%x",
1166            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1167   ss.print(", ");
1168   int features_offset = (int)ss.size();
1169   insert_features_names(_features, ss);
1170 
1171   _cpu_info_string = ss.as_string(true);
1172   _features_string = _cpu_info_string + features_offset;
1173 
1174   // Use AES instructions if available.
1175   if (supports_aes()) {
1176     if (supports_sse3()) {
1177       if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1178         FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1179       }
1180     } else if (UseAESIntrinsics) {
1181       // The AES intrinsic stubs require AES instruction support (of course)
1182       // but also require sse3 mode or higher for instructions it use.
1183       if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1184         warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1185       }
1186       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1187     }
1188     if (!UseAESIntrinsics) {
1189       if (UseAESCTRIntrinsics) {
1190         if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1191           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1192         }
1193         FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1194       }
1195     } else {
1196       if (supports_sse4_1()) {
1197         if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1198           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1199         }
1200       } else if (UseAESCTRIntrinsics) {
1201         // The AES-CTR intrinsic stubs require AES instruction support (of course)
1202         // but also require sse4.1 mode or higher for instructions it use.
1203         if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1204           warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1205         }
1206         FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1207       }
1208     }
1209   } else {
1210     if (!cpu_supports_aes()) {
1211       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1212         warning("AES intrinsics are not available on this CPU");
1213       }
1214       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1215       if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1216         warning("AES-CTR intrinsics are not available on this CPU");
1217       }
1218       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1219     } else if (!UseAES) {
1220       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1221         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1222       }
1223       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1224       if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1225         warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1226       }
1227       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1228     }
1229   }
1230 
1231   if (UseCLMUL && (UseSSE > 2)) {
1232     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1233       UseCRC32Intrinsics = true;
1234     }
1235   } else if (UseCRC32Intrinsics) {
1236     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1237       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1238     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1239   }
1240 
1241   if (supports_avx2()) {
1242     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1243       UseAdler32Intrinsics = true;
1244     }
1245   } else if (UseAdler32Intrinsics) {
1246     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1247       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1248     }
1249     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1250   }
1251 
1252   if (supports_sse4_2() && supports_clmul()) {
1253     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1254       UseCRC32CIntrinsics = true;
1255     }
1256   } else if (UseCRC32CIntrinsics) {
1257     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1258       warning("CRC32C intrinsics are not available on this CPU");
1259     }
1260     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1261   }
1262 
1263   // GHASH/GCM intrinsics
1264   if (UseCLMUL && (UseSSE > 2)) {
1265     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1266       UseGHASHIntrinsics = true;
1267     }
1268   } else if (UseGHASHIntrinsics) {
1269     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1270       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1271     }
1272     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1273   }
1274 
1275   // ChaCha20 Intrinsics
1276   // As long as the system supports AVX as a baseline we can do a
1277   // SIMD-enabled block function.  StubGenerator makes the determination
1278   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1279   // version.
1280   if (UseAVX >= 1) {
1281     if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1282       UseChaCha20Intrinsics = true;
1283     }
1284   } else if (UseChaCha20Intrinsics) {
1285     if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1286       warning("ChaCha20 intrinsic requires AVX instructions");
1287     }
1288     FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1289   }
1290 
1291   // Kyber Intrinsics
1292   // Currently we only have them for AVX512
1293   if (supports_evex() && supports_avx512bw()) {
1294     if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1295       UseKyberIntrinsics = true;
1296     }
1297   } else if (UseKyberIntrinsics) {
1298     if (!FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1299       warning("Intrinsics for ML-KEM are not available on this CPU.");
1300     }
1301     FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1302   }
1303 
1304   // Dilithium Intrinsics
1305   if (UseAVX > 1) {
1306       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1307           UseDilithiumIntrinsics = true;
1308       }
1309   } else if (UseDilithiumIntrinsics) {
1310     if (!FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1311       warning("Intrinsics for ML-DSA are not available on this CPU.");
1312     }
1313     FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1314   }
1315 
1316   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1317   if (UseAVX >= 2) {
1318     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1319       UseBASE64Intrinsics = true;
1320     }
1321   } else if (UseBASE64Intrinsics) {
1322     if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1323       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1324     }
1325     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1326   }
1327 
1328   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1329     UseMD5Intrinsics = true;
1330   }
1331 
1332   if (supports_sha() && supports_sse4_1() && UseSHA) {
1333     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1334       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1335     }
1336   } else if (UseSHA1Intrinsics) {
1337     if (!FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1338       warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1339     }
1340     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1341   }
1342 
1343   if (supports_sse4_1() && UseSHA) {
1344     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1345       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1346     }
1347   } else if (UseSHA256Intrinsics) {
1348     if (!FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1349       warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1350     }
1351     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1352   }
1353 
1354   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1355     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1356       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1357     }
1358   } else if (UseSHA512Intrinsics) {
1359     if (!FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1360       warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1361     }
1362     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1363   }
1364 
1365   if (UseSHA && supports_evex() && supports_avx512bw()) {
1366     if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1367       FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1368     }
1369   } else if (UseSHA3Intrinsics) {
1370     if (!FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1371       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1372     }
1373     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1374   }
1375 
1376 #if COMPILER2_OR_JVMCI
1377   int max_vector_size = 0;
1378   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1379     // 16 byte vectors (in XMM) are supported with SSE2+
1380     max_vector_size = 16;
1381   } else if (UseAVX == 1 || UseAVX == 2) {
1382     // 32 bytes vectors (in YMM) are only supported with AVX+
1383     max_vector_size = 32;
1384   } else if (UseAVX > 2) {
1385     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1386     max_vector_size = 64;
1387   }
1388 
1389   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1390 
1391   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1392     if (MaxVectorSize < min_vector_size) {
1393       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1394       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1395     }
1396     if (MaxVectorSize > max_vector_size) {
1397       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1398       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1399     }
1400     if (!is_power_of_2(MaxVectorSize)) {
1401       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1402       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1403     }
1404   } else {
1405     // If default, use highest supported configuration
1406     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1407   }
1408 
1409 #if defined(COMPILER2) && defined(ASSERT)
1410   if (MaxVectorSize > 0) {
1411     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1412       tty->print_cr("State of YMM registers after signal handle:");
1413       int nreg = 4;
1414       const char* ymm_name[4] = {"0", "7", "8", "15"};
1415       for (int i = 0; i < nreg; i++) {
1416         tty->print("YMM%s:", ymm_name[i]);
1417         for (int j = 7; j >=0; j--) {
1418           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1419         }
1420         tty->cr();
1421       }
1422     }
1423   }
1424 #endif // COMPILER2 && ASSERT
1425 
1426   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1427     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1428       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1429     }
1430   } else if (UsePoly1305Intrinsics) {
1431     if (!FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1432       warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1433     }
1434     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1435   }
1436 
1437   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1438     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1439       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1440     }
1441   } else if (UseIntPolyIntrinsics) {
1442     if (!FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1443       warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1444     }
1445     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1446   }
1447 
1448   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1449     UseMultiplyToLenIntrinsic = true;
1450   }
1451   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1452     UseSquareToLenIntrinsic = true;
1453   }
1454   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1455     UseMulAddIntrinsic = true;
1456   }
1457   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1458     UseMontgomeryMultiplyIntrinsic = true;
1459   }
1460   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1461     UseMontgomerySquareIntrinsic = true;
1462   }
1463 #endif // COMPILER2_OR_JVMCI
1464 
1465   // On new cpus instructions which update whole XMM register should be used
1466   // to prevent partial register stall due to dependencies on high half.
1467   //
1468   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1469   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1470   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1471   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1472 
1473 
1474   if (is_zx()) { // ZX cpus specific settings
1475     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1476       UseStoreImmI16 = false; // don't use it on ZX cpus
1477     }
1478     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1479       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1480         // Use it on all ZX cpus
1481         UseAddressNop = true;
1482       }
1483     }
1484     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1485       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1486     }
1487     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1488       if (supports_sse3()) {
1489         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1490       } else {
1491         UseXmmRegToRegMoveAll = false;
1492       }
1493     }
1494     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1495 #ifdef COMPILER2
1496       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1497         // For new ZX cpus do the next optimization:
1498         // don't align the beginning of a loop if there are enough instructions
1499         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1500         // in current fetch line (OptoLoopAlignment) or the padding
1501         // is big (> MaxLoopPad).
1502         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1503         // generated NOP instructions. 11 is the largest size of one
1504         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1505         MaxLoopPad = 11;
1506       }
1507 #endif // COMPILER2
1508       if (supports_sse4_2()) { // new ZX cpus
1509         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1510           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1511         }
1512       }
1513     }
1514 
1515     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1516       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1517     }
1518   }
1519 
1520   if (is_amd_family()) { // AMD cpus specific settings
1521     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1522       // Use it on new AMD cpus starting from Opteron.
1523       UseAddressNop = true;
1524     }
1525     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1526       if (supports_sse4a()) {
1527         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1528       } else {
1529         UseXmmLoadAndClearUpper = false;
1530       }
1531     }
1532     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1533       if (supports_sse4a()) {
1534         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1535       } else {
1536         UseXmmRegToRegMoveAll = false;
1537       }
1538     }
1539     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1540       if (supports_sse4a()) {
1541         UseXmmI2F = true;
1542       } else {
1543         UseXmmI2F = false;
1544       }
1545     }
1546     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1547       if (supports_sse4a()) {
1548         UseXmmI2D = true;
1549       } else {
1550         UseXmmI2D = false;
1551       }
1552     }
1553 
1554     // some defaults for AMD family 15h
1555     if (cpu_family() == 0x15) {
1556       // On family 15h processors default is no sw prefetch
1557       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1558         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1559       }
1560       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1561       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1562         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1563       }
1564       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1565         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1566       }
1567     }
1568 
1569 #ifdef COMPILER2
1570     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1571       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1572       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1573     }
1574 #endif // COMPILER2
1575 
1576     // Some defaults for AMD family >= 17h && Hygon family 18h
1577     if (cpu_family() >= 0x17) {
1578       // On family >=17h processors use XMM and UnalignedLoadStores
1579       // for Array Copy
1580       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1581         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1582       }
1583 #ifdef COMPILER2
1584       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1585         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1586       }
1587 #endif
1588     }
1589   }
1590 
1591   if (is_intel()) { // Intel cpus specific settings
1592     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1593       UseStoreImmI16 = false; // don't use it on Intel cpus
1594     }
1595     if (is_intel_server_family() || cpu_family() == 15) {
1596       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1597         // Use it on all Intel cpus starting from PentiumPro
1598         UseAddressNop = true;
1599       }
1600     }
1601     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1602       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1603     }
1604     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1605       if (supports_sse3()) {
1606         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1607       } else {
1608         UseXmmRegToRegMoveAll = false;
1609       }
1610     }
1611     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1612 #ifdef COMPILER2
1613       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1614         // For new Intel cpus do the next optimization:
1615         // don't align the beginning of a loop if there are enough instructions
1616         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1617         // in current fetch line (OptoLoopAlignment) or the padding
1618         // is big (> MaxLoopPad).
1619         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1620         // generated NOP instructions. 11 is the largest size of one
1621         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1622         MaxLoopPad = 11;
1623       }
1624 #endif // COMPILER2
1625 
1626       if (is_intel_modern_cpu()) { // Newest Intel cpus
1627         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1628           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1629         }
1630       }
1631     }
1632     if (is_atom_family() || is_knights_family()) {
1633 #ifdef COMPILER2
1634       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1635         OptoScheduling = true;
1636       }
1637 #endif
1638       if (supports_sse4_2()) { // Silvermont
1639         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1640           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1641         }
1642       }
1643       if (FLAG_IS_DEFAULT(UseIncDec)) {
1644         FLAG_SET_DEFAULT(UseIncDec, false);
1645       }
1646     }
1647     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1648       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1649     }
1650   }
1651 
1652 #ifdef COMPILER2
1653   if (UseAVX > 2) {
1654     if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1655         (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1656          ArrayOperationPartialInlineSize != 0 &&
1657          ArrayOperationPartialInlineSize != 16 &&
1658          ArrayOperationPartialInlineSize != 32 &&
1659          ArrayOperationPartialInlineSize != 64)) {
1660       int inline_size = 0;
1661       if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1662         inline_size = 64;
1663       } else if (MaxVectorSize >= 32) {
1664         inline_size = 32;
1665       } else if (MaxVectorSize >= 16) {
1666         inline_size = 16;
1667       }
1668       if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1669         warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1670       }
1671       ArrayOperationPartialInlineSize = inline_size;
1672     }
1673 
1674     if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1675       ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1676       if (ArrayOperationPartialInlineSize) {
1677         warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1678       } else {
1679         warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1680       }
1681     }
1682   }
1683 
1684   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1685     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1686       OptimizeFill = false;
1687     }
1688   }
1689 #endif
1690   if (supports_sse4_2()) {
1691     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1692       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1693     }
1694   } else if (UseSSE42Intrinsics) {
1695     if (!FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1696       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1697     }
1698     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1699   }
1700   if (UseSSE42Intrinsics) {
1701     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1702       UseVectorizedMismatchIntrinsic = true;
1703     }
1704   } else if (UseVectorizedMismatchIntrinsic) {
1705     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1706       warning("vectorizedMismatch intrinsics are not available on this CPU");
1707     }
1708     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1709   }
1710   if (UseAVX >= 2) {
1711     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1712   } else if (UseVectorizedHashCodeIntrinsic) {
1713     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1714       warning("vectorizedHashCode intrinsics are not available on this CPU");
1715     }
1716     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1717   }
1718 
1719   // Use count leading zeros count instruction if available.
1720   if (supports_lzcnt()) {
1721     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1722       UseCountLeadingZerosInstruction = true;
1723     }
1724    } else if (UseCountLeadingZerosInstruction) {
1725     if (!FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1726       warning("lzcnt instruction is not available on this CPU");
1727     }
1728     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1729   }
1730 
1731   // Use count trailing zeros instruction if available
1732   if (supports_bmi1()) {
1733     // tzcnt does not require VEX prefix
1734     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1735       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1736         // Don't use tzcnt if BMI1 is switched off on command line.
1737         UseCountTrailingZerosInstruction = false;
1738       } else {
1739         UseCountTrailingZerosInstruction = true;
1740       }
1741     }
1742   } else if (UseCountTrailingZerosInstruction) {
1743     if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1744       warning("tzcnt instruction is not available on this CPU");
1745     }
1746     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1747   }
1748 
1749   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1750   // VEX prefix is generated only when AVX > 0.
1751   if (supports_bmi1() && supports_avx()) {
1752     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1753       UseBMI1Instructions = true;
1754     }
1755   } else if (UseBMI1Instructions) {
1756     if (!FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1757       warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1758     }
1759     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1760   }
1761 
1762   if (supports_bmi2() && supports_avx()) {
1763     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1764       UseBMI2Instructions = true;
1765     }
1766   } else if (UseBMI2Instructions) {
1767     if (!FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1768       warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1769     }
1770     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1771   }
1772 
1773   // Use population count instruction if available.
1774   if (supports_popcnt()) {
1775     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1776       UsePopCountInstruction = true;
1777     }
1778   } else if (UsePopCountInstruction) {
1779     if (!FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1780       warning("POPCNT instruction is not available on this CPU");
1781     }
1782     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1783   }
1784 
1785   // Use fast-string operations if available.
1786   if (supports_erms()) {
1787     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1788       UseFastStosb = true;
1789     }
1790   } else if (UseFastStosb) {
1791     if (!FLAG_IS_DEFAULT(UseFastStosb)) {
1792       warning("fast-string operations are not available on this CPU");
1793     }
1794     FLAG_SET_DEFAULT(UseFastStosb, false);
1795   }
1796 
1797   // For AMD Processors use XMM/YMM MOVDQU instructions
1798   // for Object Initialization as default
1799   if (is_amd() && cpu_family() >= 0x19) {
1800     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1801       UseFastStosb = false;
1802     }
1803   }
1804 
1805 #ifdef COMPILER2
1806   if (is_intel() && MaxVectorSize > 16) {
1807     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1808       UseFastStosb = false;
1809     }
1810   }
1811 #endif
1812 
1813   // Use XMM/YMM MOVDQU instruction for Object Initialization
1814   if (!UseFastStosb && UseUnalignedLoadStores) {
1815     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1816       UseXMMForObjInit = true;
1817     }
1818   } else if (UseXMMForObjInit) {
1819     if (!FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1820       warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1821     }
1822     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1823   }
1824 
1825 #ifdef COMPILER2
1826   if (FLAG_IS_DEFAULT(AlignVector)) {
1827     // Modern processors allow misaligned memory operations for vectors.
1828     AlignVector = !UseUnalignedLoadStores;
1829   }
1830 #endif // COMPILER2
1831 
1832   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1833     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1834       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1835     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1836       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1837     }
1838   }
1839 
1840   // Allocation prefetch settings
1841   int cache_line_size = checked_cast<int>(prefetch_data_size());
1842   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1843       (cache_line_size > AllocatePrefetchStepSize)) {
1844     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1845   }
1846 
1847   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1848     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1849     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1850       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1851     }
1852     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1853   }
1854 
1855   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1856     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1857     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1858   }
1859 
1860   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1861     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1862         is_intel_modern_cpu()) { // Nehalem based cpus
1863       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1864     }
1865 #ifdef COMPILER2
1866     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1867       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1868     }
1869 #endif
1870   }
1871 
1872   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1873 #ifdef COMPILER2
1874     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1875       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1876     }
1877 #endif
1878   }
1879 
1880   // Prefetch settings
1881 
1882   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1883   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1884   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1885   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1886 
1887   // gc copy/scan is disabled if prefetchw isn't supported, because
1888   // Prefetch::write emits an inlined prefetchw on Linux.
1889   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1890   // The used prefetcht0 instruction works for both amd64 and em64t.
1891 
1892   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1893     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1894   }
1895   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1896     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1897   }
1898 
1899   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1900      (cache_line_size > ContendedPaddingWidth))
1901     ContendedPaddingWidth = cache_line_size;
1902 
1903   // This machine allows unaligned memory accesses
1904   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1905     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1906   }
1907 
1908 #ifndef PRODUCT
1909   if (log_is_enabled(Info, os, cpu)) {
1910     LogStream ls(Log(os, cpu)::info());
1911     outputStream* log = &ls;
1912     log->print_cr("Logical CPUs per core: %u",
1913                   logical_processors_per_package());
1914     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1915     log->print("UseSSE=%d", UseSSE);
1916     if (UseAVX > 0) {
1917       log->print("  UseAVX=%d", UseAVX);
1918     }
1919     if (UseAES) {
1920       log->print("  UseAES=1");
1921     }
1922 #ifdef COMPILER2
1923     if (MaxVectorSize > 0) {
1924       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1925     }
1926 #endif
1927     log->cr();
1928     log->print("Allocation");
1929     if (AllocatePrefetchStyle <= 0) {
1930       log->print_cr(": no prefetching");
1931     } else {
1932       log->print(" prefetching: ");
1933       if (AllocatePrefetchInstr == 0) {
1934         log->print("PREFETCHNTA");
1935       } else if (AllocatePrefetchInstr == 1) {
1936         log->print("PREFETCHT0");
1937       } else if (AllocatePrefetchInstr == 2) {
1938         log->print("PREFETCHT2");
1939       } else if (AllocatePrefetchInstr == 3) {
1940         log->print("PREFETCHW");
1941       }
1942       if (AllocatePrefetchLines > 1) {
1943         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1944       } else {
1945         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1946       }
1947     }
1948 
1949     if (PrefetchCopyIntervalInBytes > 0) {
1950       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1951     }
1952     if (PrefetchScanIntervalInBytes > 0) {
1953       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1954     }
1955     if (ContendedPaddingWidth > 0) {
1956       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1957     }
1958   }
1959 #endif // !PRODUCT
1960   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1961       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1962   }
1963   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1964       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1965   }
1966   // CopyAVX3Threshold is the threshold at which 64-byte instructions are used
1967   // for implementing the array copy and clear operations.
1968   // The Intel platforms that supports the serialize instruction
1969   // have improved implementation of 64-byte load/stores and so the default
1970   // threshold is set to 0 for these platforms.
1971   if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
1972     if (is_intel() && is_intel_server_family() && supports_serialize()) {
1973       FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1974     } else {
1975       FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
1976     }
1977   }
1978 }
1979 
1980 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1981   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1982   if (vrt == XenHVM) {
1983     st->print_cr("Xen hardware-assisted virtualization detected");
1984   } else if (vrt == KVM) {
1985     st->print_cr("KVM virtualization detected");
1986   } else if (vrt == VMWare) {
1987     st->print_cr("VMWare virtualization detected");
1988     VirtualizationSupport::print_virtualization_info(st);
1989   } else if (vrt == HyperV) {
1990     st->print_cr("Hyper-V virtualization detected");
1991   } else if (vrt == HyperVRole) {
1992     st->print_cr("Hyper-V role detected");
1993   }
1994 }
1995 
1996 bool VM_Version::compute_has_intel_jcc_erratum() {
1997   if (!is_intel_family_core()) {
1998     // Only Intel CPUs are affected.
1999     return false;
2000   }
2001   // The following table of affected CPUs is based on the following document released by Intel:
2002   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2003   switch (_model) {
2004   case 0x8E:
2005     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2006     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2007     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2008     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2009     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2010     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2011     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2012     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2013     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2014     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2015   case 0x4E:
2016     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2017     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2018     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2019     return _stepping == 0x3;
2020   case 0x55:
2021     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2022     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2023     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2024     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2025     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2026     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2027     return _stepping == 0x4 || _stepping == 0x7;
2028   case 0x5E:
2029     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2030     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2031     return _stepping == 0x3;
2032   case 0x9E:
2033     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2034     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2035     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2036     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2037     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2038     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2039     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2040     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2041     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2042     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2043     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2044     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2045     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2046     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2047     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2048   case 0xA5:
2049     // Not in Intel documentation.
2050     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2051     return true;
2052   case 0xA6:
2053     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2054     return _stepping == 0x0;
2055   case 0xAE:
2056     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2057     return _stepping == 0xA;
2058   default:
2059     // If we are running on another intel machine not recognized in the table, we are okay.
2060     return false;
2061   }
2062 }
2063 
2064 // On Xen, the cpuid instruction returns
2065 //  eax / registers[0]: Version of Xen
2066 //  ebx / registers[1]: chars 'XenV'
2067 //  ecx / registers[2]: chars 'MMXe'
2068 //  edx / registers[3]: chars 'nVMM'
2069 //
2070 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2071 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2072 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2073 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2074 //
2075 // more information :
2076 // https://kb.vmware.com/s/article/1009458
2077 //
2078 void VM_Version::check_virtualizations() {
2079   uint32_t registers[4] = {0};
2080   char signature[13] = {0};
2081 
2082   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2083   // from 0x40000000 until 0x40010000.
2084   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2085   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2086     detect_virt_stub(leaf, registers);
2087     memcpy(signature, &registers[1], 12);
2088 
2089     if (strncmp("VMwareVMware", signature, 12) == 0) {
2090       Abstract_VM_Version::_detected_virtualization = VMWare;
2091       // check for extended metrics from guestlib
2092       VirtualizationSupport::initialize();
2093     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2094       Abstract_VM_Version::_detected_virtualization = HyperV;
2095 #ifdef _WINDOWS
2096       // CPUID leaf 0x40000007 is available to the root partition only.
2097       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2098       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2099       detect_virt_stub(0x40000007, registers);
2100       if ((registers[0] != 0x0) ||
2101           (registers[1] != 0x0) ||
2102           (registers[2] != 0x0) ||
2103           (registers[3] != 0x0)) {
2104         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2105       }
2106 #endif
2107     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2108       Abstract_VM_Version::_detected_virtualization = KVM;
2109     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2110       Abstract_VM_Version::_detected_virtualization = XenHVM;
2111     }
2112   }
2113 }
2114 
2115 #ifdef COMPILER2
2116 // Determine if it's running on Cascade Lake using default options.
2117 bool VM_Version::is_default_intel_cascade_lake() {
2118   return FLAG_IS_DEFAULT(UseAVX) &&
2119          FLAG_IS_DEFAULT(MaxVectorSize) &&
2120          UseAVX > 2 &&
2121          is_intel_cascade_lake();
2122 }
2123 #endif
2124 
2125 bool VM_Version::is_intel_cascade_lake() {
2126   return is_intel_skylake() && _stepping >= 5;
2127 }
2128 
2129 bool VM_Version::is_intel_darkmont() {
2130   return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2131 }
2132 
2133 void VM_Version::clear_apx_test_state() {
2134   clear_apx_test_state_stub();
2135 }
2136 
2137 static bool _vm_version_initialized = false;
2138 
2139 void VM_Version::initialize() {
2140   ResourceMark rm;
2141 
2142   // Making this stub must be FIRST use of assembler
2143   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2144   if (stub_blob == nullptr) {
2145     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2146   }
2147   CodeBuffer c(stub_blob);
2148   VM_Version_StubGenerator g(&c);
2149 
2150   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2151                                      g.generate_get_cpu_info());
2152   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2153                                      g.generate_detect_virt());
2154   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2155                                      g.clear_apx_test_state());
2156   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2157                                      g.generate_getCPUIDBrandString());
2158   get_processor_features();
2159 
2160   Assembler::precompute_instructions();
2161 
2162   if (VM_Version::supports_hv()) { // Supports hypervisor
2163     check_virtualizations();
2164   }
2165   _vm_version_initialized = true;
2166 }
2167 
2168 typedef enum {
2169    CPU_FAMILY_8086_8088  = 0,
2170    CPU_FAMILY_INTEL_286  = 2,
2171    CPU_FAMILY_INTEL_386  = 3,
2172    CPU_FAMILY_INTEL_486  = 4,
2173    CPU_FAMILY_PENTIUM    = 5,
2174    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2175    CPU_FAMILY_PENTIUM_4  = 0xF
2176 } FamilyFlag;
2177 
2178 typedef enum {
2179   RDTSCP_FLAG  = 0x08000000, // bit 27
2180   INTEL64_FLAG = 0x20000000  // bit 29
2181 } _featureExtendedEdxFlag;
2182 
2183 typedef enum {
2184    FPU_FLAG     = 0x00000001,
2185    VME_FLAG     = 0x00000002,
2186    DE_FLAG      = 0x00000004,
2187    PSE_FLAG     = 0x00000008,
2188    TSC_FLAG     = 0x00000010,
2189    MSR_FLAG     = 0x00000020,
2190    PAE_FLAG     = 0x00000040,
2191    MCE_FLAG     = 0x00000080,
2192    CX8_FLAG     = 0x00000100,
2193    APIC_FLAG    = 0x00000200,
2194    SEP_FLAG     = 0x00000800,
2195    MTRR_FLAG    = 0x00001000,
2196    PGE_FLAG     = 0x00002000,
2197    MCA_FLAG     = 0x00004000,
2198    CMOV_FLAG    = 0x00008000,
2199    PAT_FLAG     = 0x00010000,
2200    PSE36_FLAG   = 0x00020000,
2201    PSNUM_FLAG   = 0x00040000,
2202    CLFLUSH_FLAG = 0x00080000,
2203    DTS_FLAG     = 0x00200000,
2204    ACPI_FLAG    = 0x00400000,
2205    MMX_FLAG     = 0x00800000,
2206    FXSR_FLAG    = 0x01000000,
2207    SSE_FLAG     = 0x02000000,
2208    SSE2_FLAG    = 0x04000000,
2209    SS_FLAG      = 0x08000000,
2210    HTT_FLAG     = 0x10000000,
2211    TM_FLAG      = 0x20000000
2212 } FeatureEdxFlag;
2213 
2214 // VM_Version statics
2215 enum {
2216   ExtendedFamilyIdLength_INTEL = 16,
2217   ExtendedFamilyIdLength_AMD   = 24
2218 };
2219 
2220 const size_t VENDOR_LENGTH = 13;
2221 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2222 static char* _cpu_brand_string = nullptr;
2223 static int64_t _max_qualified_cpu_frequency = 0;
2224 
2225 static int _no_of_threads = 0;
2226 static int _no_of_cores = 0;
2227 
2228 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2229   "8086/8088",
2230   "",
2231   "286",
2232   "386",
2233   "486",
2234   "Pentium",
2235   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2236   "",
2237   "",
2238   "",
2239   "",
2240   "",
2241   "",
2242   "",
2243   "",
2244   "Pentium 4"
2245 };
2246 
2247 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2248   "",
2249   "",
2250   "",
2251   "",
2252   "5x86",
2253   "K5/K6",
2254   "Athlon/AthlonXP",
2255   "",
2256   "",
2257   "",
2258   "",
2259   "",
2260   "",
2261   "",
2262   "",
2263   "Opteron/Athlon64",
2264   "Opteron QC/Phenom",  // Barcelona et.al.
2265   "",
2266   "",
2267   "",
2268   "",
2269   "",
2270   "",
2271   "Zen"
2272 };
2273 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2274 // September 2013, Vol 3C Table 35-1
2275 const char* const _model_id_pentium_pro[] = {
2276   "",
2277   "Pentium Pro",
2278   "",
2279   "Pentium II model 3",
2280   "",
2281   "Pentium II model 5/Xeon/Celeron",
2282   "Celeron",
2283   "Pentium III/Pentium III Xeon",
2284   "Pentium III/Pentium III Xeon",
2285   "Pentium M model 9",    // Yonah
2286   "Pentium III, model A",
2287   "Pentium III, model B",
2288   "",
2289   "Pentium M model D",    // Dothan
2290   "",
2291   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2292   "",
2293   "",
2294   "",
2295   "",
2296   "",
2297   "",
2298   "Celeron",              // 0x16 Celeron 65nm
2299   "Core 2",               // 0x17 Penryn / Harpertown
2300   "",
2301   "",
2302   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2303   "Atom",                 // 0x1B Z5xx series Silverthorn
2304   "",
2305   "Core 2",               // 0x1D Dunnington (6-core)
2306   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2307   "",
2308   "",
2309   "",
2310   "",
2311   "",
2312   "",
2313   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2314   "",
2315   "",
2316   "",                     // 0x28
2317   "",
2318   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2319   "",
2320   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2321   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2322   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2323   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2324   "",
2325   "",
2326   "",
2327   "",
2328   "",
2329   "",
2330   "",
2331   "",
2332   "",
2333   "",
2334   "Ivy Bridge",           // 0x3a
2335   "",
2336   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2337   "",                     // 0x3d "Next Generation Intel Core Processor"
2338   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2339   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2340   "",
2341   "",
2342   "",
2343   "",
2344   "",
2345   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2346   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2347   nullptr
2348 };
2349 
2350 /* Brand ID is for back compatibility
2351  * Newer CPUs uses the extended brand string */
2352 const char* const _brand_id[] = {
2353   "",
2354   "Celeron processor",
2355   "Pentium III processor",
2356   "Intel Pentium III Xeon processor",
2357   "",
2358   "",
2359   "",
2360   "",
2361   "Intel Pentium 4 processor",
2362   nullptr
2363 };
2364 
2365 
2366 const char* const _feature_edx_id[] = {
2367   "On-Chip FPU",
2368   "Virtual Mode Extensions",
2369   "Debugging Extensions",
2370   "Page Size Extensions",
2371   "Time Stamp Counter",
2372   "Model Specific Registers",
2373   "Physical Address Extension",
2374   "Machine Check Exceptions",
2375   "CMPXCHG8B Instruction",
2376   "On-Chip APIC",
2377   "",
2378   "Fast System Call",
2379   "Memory Type Range Registers",
2380   "Page Global Enable",
2381   "Machine Check Architecture",
2382   "Conditional Mov Instruction",
2383   "Page Attribute Table",
2384   "36-bit Page Size Extension",
2385   "Processor Serial Number",
2386   "CLFLUSH Instruction",
2387   "",
2388   "Debug Trace Store feature",
2389   "ACPI registers in MSR space",
2390   "Intel Architecture MMX Technology",
2391   "Fast Float Point Save and Restore",
2392   "Streaming SIMD extensions",
2393   "Streaming SIMD extensions 2",
2394   "Self-Snoop",
2395   "Hyper Threading",
2396   "Thermal Monitor",
2397   "",
2398   "Pending Break Enable"
2399 };
2400 
2401 const char* const _feature_extended_edx_id[] = {
2402   "",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "",
2408   "",
2409   "",
2410   "",
2411   "",
2412   "",
2413   "SYSCALL/SYSRET",
2414   "",
2415   "",
2416   "",
2417   "",
2418   "",
2419   "",
2420   "",
2421   "",
2422   "Execute Disable Bit",
2423   "",
2424   "",
2425   "",
2426   "",
2427   "",
2428   "",
2429   "RDTSCP",
2430   "",
2431   "Intel 64 Architecture",
2432   "",
2433   ""
2434 };
2435 
2436 const char* const _feature_ecx_id[] = {
2437   "Streaming SIMD Extensions 3",
2438   "PCLMULQDQ",
2439   "64-bit DS Area",
2440   "MONITOR/MWAIT instructions",
2441   "CPL Qualified Debug Store",
2442   "Virtual Machine Extensions",
2443   "Safer Mode Extensions",
2444   "Enhanced Intel SpeedStep technology",
2445   "Thermal Monitor 2",
2446   "Supplemental Streaming SIMD Extensions 3",
2447   "L1 Context ID",
2448   "",
2449   "Fused Multiply-Add",
2450   "CMPXCHG16B",
2451   "xTPR Update Control",
2452   "Perfmon and Debug Capability",
2453   "",
2454   "Process-context identifiers",
2455   "Direct Cache Access",
2456   "Streaming SIMD extensions 4.1",
2457   "Streaming SIMD extensions 4.2",
2458   "x2APIC",
2459   "MOVBE",
2460   "Popcount instruction",
2461   "TSC-Deadline",
2462   "AESNI",
2463   "XSAVE",
2464   "OSXSAVE",
2465   "AVX",
2466   "F16C",
2467   "RDRAND",
2468   ""
2469 };
2470 
2471 const char* const _feature_extended_ecx_id[] = {
2472   "LAHF/SAHF instruction support",
2473   "Core multi-processor legacy mode",
2474   "",
2475   "",
2476   "",
2477   "Advanced Bit Manipulations: LZCNT",
2478   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2479   "Misaligned SSE mode",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "",
2485   "",
2486   "",
2487   "",
2488   "",
2489   "",
2490   "",
2491   "",
2492   "",
2493   "",
2494   "",
2495   "",
2496   "",
2497   "",
2498   "",
2499   "",
2500   "",
2501   "",
2502   "",
2503   ""
2504 };
2505 
2506 const char* VM_Version::cpu_model_description(void) {
2507   uint32_t cpu_family = extended_cpu_family();
2508   uint32_t cpu_model = extended_cpu_model();
2509   const char* model = nullptr;
2510 
2511   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2512     for (uint32_t i = 0; i <= cpu_model; i++) {
2513       model = _model_id_pentium_pro[i];
2514       if (model == nullptr) {
2515         break;
2516       }
2517     }
2518   }
2519   return model;
2520 }
2521 
2522 const char* VM_Version::cpu_brand_string(void) {
2523   if (_cpu_brand_string == nullptr) {
2524     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2525     if (nullptr == _cpu_brand_string) {
2526       return nullptr;
2527     }
2528     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2529     if (ret_val != OS_OK) {
2530       FREE_C_HEAP_ARRAY(_cpu_brand_string);
2531       _cpu_brand_string = nullptr;
2532     }
2533   }
2534   return _cpu_brand_string;
2535 }
2536 
2537 const char* VM_Version::cpu_brand(void) {
2538   const char*  brand  = nullptr;
2539 
2540   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2541     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2542     brand = _brand_id[0];
2543     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2544       brand = _brand_id[i];
2545     }
2546   }
2547   return brand;
2548 }
2549 
2550 bool VM_Version::cpu_is_em64t(void) {
2551   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2552 }
2553 
2554 bool VM_Version::is_netburst(void) {
2555   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2556 }
2557 
2558 bool VM_Version::supports_tscinv_ext(void) {
2559   if (!supports_tscinv_bit()) {
2560     return false;
2561   }
2562 
2563   if (is_intel()) {
2564     return true;
2565   }
2566 
2567   if (is_amd()) {
2568     return !is_amd_Barcelona();
2569   }
2570 
2571   if (is_hygon()) {
2572     return true;
2573   }
2574 
2575   return false;
2576 }
2577 
2578 void VM_Version::resolve_cpu_information_details(void) {
2579 
2580   // in future we want to base this information on proper cpu
2581   // and cache topology enumeration such as:
2582   // Intel 64 Architecture Processor Topology Enumeration
2583   // which supports system cpu and cache topology enumeration
2584   // either using 2xAPICIDs or initial APICIDs
2585 
2586   // currently only rough cpu information estimates
2587   // which will not necessarily reflect the exact configuration of the system
2588 
2589   // this is the number of logical hardware threads
2590   // visible to the operating system
2591   _no_of_threads = os::processor_count();
2592 
2593   // find out number of threads per cpu package
2594   int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2595   if (threads_per_package == 0) {
2596     // Fallback code to avoid div by zero in subsequent code.
2597     // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2598     threads_per_package = threads_per_core() * cores_per_cpu();
2599   }
2600 
2601   // use amount of threads visible to the process in order to guess number of sockets
2602   _no_of_sockets = _no_of_threads / threads_per_package;
2603 
2604   // process might only see a subset of the total number of threads
2605   // from a single processor package. Virtualization/resource management for example.
2606   // If so then just write a hard 1 as num of pkgs.
2607   if (0 == _no_of_sockets) {
2608     _no_of_sockets = 1;
2609   }
2610 
2611   // estimate the number of cores
2612   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2613 }
2614 
2615 
2616 const char* VM_Version::cpu_family_description(void) {
2617   int cpu_family_id = extended_cpu_family();
2618   if (is_amd()) {
2619     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2620       return _family_id_amd[cpu_family_id];
2621     }
2622   }
2623   if (is_intel()) {
2624     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2625       return cpu_model_description();
2626     }
2627     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2628       return _family_id_intel[cpu_family_id];
2629     }
2630   }
2631   if (is_zx()) {
2632     int cpu_model_id = extended_cpu_model();
2633     if (cpu_family_id == 7) {
2634       switch (cpu_model_id) {
2635         case 0x1B:
2636           return "wudaokou";
2637         case 0x3B:
2638           return "lujiazui";
2639         case 0x5B:
2640           return "yongfeng";
2641         case 0x6B:
2642           return "shijidadao";
2643       }
2644     } else if (cpu_family_id == 6) {
2645       return "zhangjiang";
2646     }
2647   }
2648   if (is_hygon()) {
2649     return "Dhyana";
2650   }
2651   return "Unknown x86";
2652 }
2653 
2654 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2655   assert(buf != nullptr, "buffer is null!");
2656   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2657 
2658   const char* cpu_type = nullptr;
2659   const char* x64 = nullptr;
2660 
2661   if (is_intel()) {
2662     cpu_type = "Intel";
2663     x64 = cpu_is_em64t() ? " Intel64" : "";
2664   } else if (is_amd()) {
2665     cpu_type = "AMD";
2666     x64 = cpu_is_em64t() ? " AMD64" : "";
2667   } else if (is_zx()) {
2668     cpu_type = "Zhaoxin";
2669     x64 = cpu_is_em64t() ? " x86_64" : "";
2670   } else if (is_hygon()) {
2671     cpu_type = "Hygon";
2672     x64 = cpu_is_em64t() ? " AMD64" : "";
2673   } else {
2674     cpu_type = "Unknown x86";
2675     x64 = cpu_is_em64t() ? " x86_64" : "";
2676   }
2677 
2678   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2679     cpu_type,
2680     cpu_family_description(),
2681     supports_ht() ? " (HT)" : "",
2682     supports_sse3() ? " SSE3" : "",
2683     supports_ssse3() ? " SSSE3" : "",
2684     supports_sse4_1() ? " SSE4.1" : "",
2685     supports_sse4_2() ? " SSE4.2" : "",
2686     supports_sse4a() ? " SSE4A" : "",
2687     is_netburst() ? " Netburst" : "",
2688     is_intel_family_core() ? " Core" : "",
2689     x64);
2690 
2691   return OS_OK;
2692 }
2693 
2694 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2695   assert(buf != nullptr, "buffer is null!");
2696   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2697   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2698 
2699   // invoke newly generated asm code to fetch CPU Brand String
2700   getCPUIDBrandString_stub(&_cpuid_info);
2701 
2702   // fetch results into buffer
2703   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2704   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2705   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2706   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2707   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2708   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2709   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2710   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2711   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2712   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2713   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2714   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2715 
2716   return OS_OK;
2717 }
2718 
2719 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2720   guarantee(buf != nullptr, "buffer is null!");
2721   guarantee(buf_len > 0, "buffer len not enough!");
2722 
2723   unsigned int flag = 0;
2724   unsigned int fi = 0;
2725   size_t       written = 0;
2726   const char*  prefix = "";
2727 
2728 #define WRITE_TO_BUF(string)                                                          \
2729   {                                                                                   \
2730     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2731     if (res < 0) {                                                                    \
2732       return buf_len - 1;                                                             \
2733     }                                                                                 \
2734     written += res;                                                                   \
2735     if (prefix[0] == '\0') {                                                          \
2736       prefix = ", ";                                                                  \
2737     }                                                                                 \
2738   }
2739 
2740   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2741     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2742       continue; /* no hyperthreading */
2743     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2744       continue; /* no fast system call */
2745     }
2746     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2747       WRITE_TO_BUF(_feature_edx_id[fi]);
2748     }
2749   }
2750 
2751   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2752     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2753       WRITE_TO_BUF(_feature_ecx_id[fi]);
2754     }
2755   }
2756 
2757   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2758     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2759       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2760     }
2761   }
2762 
2763   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2764     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2765       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2766     }
2767   }
2768 
2769   if (supports_tscinv_bit()) {
2770       WRITE_TO_BUF("Invariant TSC");
2771   }
2772 
2773   if (supports_hybrid()) {
2774       WRITE_TO_BUF("Hybrid Architecture");
2775   }
2776 
2777   return written;
2778 }
2779 
2780 /**
2781  * Write a detailed description of the cpu to a given buffer, including
2782  * feature set.
2783  */
2784 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2785   assert(buf != nullptr, "buffer is null!");
2786   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2787 
2788   static const char* unknown = "<unknown>";
2789   char               vendor_id[VENDOR_LENGTH];
2790   const char*        family = nullptr;
2791   const char*        model = nullptr;
2792   const char*        brand = nullptr;
2793   int                outputLen = 0;
2794 
2795   family = cpu_family_description();
2796   if (family == nullptr) {
2797     family = unknown;
2798   }
2799 
2800   model = cpu_model_description();
2801   if (model == nullptr) {
2802     model = unknown;
2803   }
2804 
2805   brand = cpu_brand_string();
2806 
2807   if (brand == nullptr) {
2808     brand = cpu_brand();
2809     if (brand == nullptr) {
2810       brand = unknown;
2811     }
2812   }
2813 
2814   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2815   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2816   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2817   vendor_id[VENDOR_LENGTH-1] = '\0';
2818 
2819   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2820     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2821     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2822     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2823     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2824     "Supports: ",
2825     brand,
2826     vendor_id,
2827     family,
2828     extended_cpu_family(),
2829     model,
2830     extended_cpu_model(),
2831     cpu_stepping(),
2832     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2833     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2834     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2835     _cpuid_info.std_cpuid1_eax.value,
2836     _cpuid_info.std_cpuid1_ebx.value,
2837     _cpuid_info.std_cpuid1_ecx.value,
2838     _cpuid_info.std_cpuid1_edx.value,
2839     _cpuid_info.ext_cpuid1_eax,
2840     _cpuid_info.ext_cpuid1_ebx,
2841     _cpuid_info.ext_cpuid1_ecx,
2842     _cpuid_info.ext_cpuid1_edx);
2843 
2844   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2845     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2846     return OS_ERR;
2847   }
2848 
2849   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2850 
2851   return OS_OK;
2852 }
2853 
2854 
2855 // Fill in Abstract_VM_Version statics
2856 void VM_Version::initialize_cpu_information() {
2857   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2858   assert(!_initialized, "shouldn't be initialized yet");
2859   resolve_cpu_information_details();
2860 
2861   // initialize cpu_name and cpu_desc
2862   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2863   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2864   _initialized = true;
2865 }
2866 
2867 /**
2868  *  For information about extracting the frequency from the cpu brand string, please see:
2869  *
2870  *    Intel Processor Identification and the CPUID Instruction
2871  *    Application Note 485
2872  *    May 2012
2873  *
2874  * The return value is the frequency in Hz.
2875  */
2876 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2877   const char* const brand_string = cpu_brand_string();
2878   if (brand_string == nullptr) {
2879     return 0;
2880   }
2881   const int64_t MEGA = 1000000;
2882   int64_t multiplier = 0;
2883   int64_t frequency = 0;
2884   uint8_t idx = 0;
2885   // The brand string buffer is at most 48 bytes.
2886   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2887   for (; idx < 48-2; ++idx) {
2888     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2889     // Search brand string for "yHz" where y is M, G, or T.
2890     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2891       if (brand_string[idx] == 'M') {
2892         multiplier = MEGA;
2893       } else if (brand_string[idx] == 'G') {
2894         multiplier = MEGA * 1000;
2895       } else if (brand_string[idx] == 'T') {
2896         multiplier = MEGA * MEGA;
2897       }
2898       break;
2899     }
2900   }
2901   if (multiplier > 0) {
2902     // Compute frequency (in Hz) from brand string.
2903     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2904       frequency =  (brand_string[idx-4] - '0') * multiplier;
2905       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2906       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2907     } else { // format is "xxxx"
2908       frequency =  (brand_string[idx-4] - '0') * 1000;
2909       frequency += (brand_string[idx-3] - '0') * 100;
2910       frequency += (brand_string[idx-2] - '0') * 10;
2911       frequency += (brand_string[idx-1] - '0');
2912       frequency *= multiplier;
2913     }
2914   }
2915   return frequency;
2916 }
2917 
2918 
2919 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2920   if (_max_qualified_cpu_frequency == 0) {
2921     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2922   }
2923   return _max_qualified_cpu_frequency;
2924 }
2925 
2926 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2927   VM_Features vm_features;
2928   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2929     vm_features.set_feature(CPU_CX8);
2930   if (std_cpuid1_edx.bits.cmov != 0)
2931     vm_features.set_feature(CPU_CMOV);
2932   if (std_cpuid1_edx.bits.clflush != 0)
2933     vm_features.set_feature(CPU_FLUSH);
2934   // clflush should always be available on x86_64
2935   // if not we are in real trouble because we rely on it
2936   // to flush the code cache.
2937   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2938   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2939       ext_cpuid1_edx.bits.fxsr != 0))
2940     vm_features.set_feature(CPU_FXSR);
2941   // HT flag is set for multi-core processors also.
2942   if (threads_per_core() > 1)
2943     vm_features.set_feature(CPU_HT);
2944   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2945       ext_cpuid1_edx.bits.mmx != 0))
2946     vm_features.set_feature(CPU_MMX);
2947   if (std_cpuid1_edx.bits.sse != 0)
2948     vm_features.set_feature(CPU_SSE);
2949   if (std_cpuid1_edx.bits.sse2 != 0)
2950     vm_features.set_feature(CPU_SSE2);
2951   if (std_cpuid1_ecx.bits.sse3 != 0)
2952     vm_features.set_feature(CPU_SSE3);
2953   if (std_cpuid1_ecx.bits.ssse3 != 0)
2954     vm_features.set_feature(CPU_SSSE3);
2955   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2956     vm_features.set_feature(CPU_SSE4_1);
2957   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2958     vm_features.set_feature(CPU_SSE4_2);
2959   if (std_cpuid1_ecx.bits.popcnt != 0)
2960     vm_features.set_feature(CPU_POPCNT);
2961   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2962       xem_xcr0_eax.bits.apx_f != 0 &&
2963       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2964     vm_features.set_feature(CPU_APX_F);
2965   }
2966   if (std_cpuid1_ecx.bits.avx != 0 &&
2967       std_cpuid1_ecx.bits.osxsave != 0 &&
2968       xem_xcr0_eax.bits.sse != 0 &&
2969       xem_xcr0_eax.bits.ymm != 0) {
2970     vm_features.set_feature(CPU_AVX);
2971     vm_features.set_feature(CPU_VZEROUPPER);
2972     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2973       vm_features.set_feature(CPU_SHA512);
2974     if (std_cpuid1_ecx.bits.f16c != 0)
2975       vm_features.set_feature(CPU_F16C);
2976     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2977       vm_features.set_feature(CPU_AVX2);
2978       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2979         vm_features.set_feature(CPU_AVX_IFMA);
2980     }
2981     if (sef_cpuid7_ecx.bits.gfni != 0)
2982         vm_features.set_feature(CPU_GFNI);
2983     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2984         xem_xcr0_eax.bits.opmask != 0 &&
2985         xem_xcr0_eax.bits.zmm512 != 0 &&
2986         xem_xcr0_eax.bits.zmm32 != 0) {
2987       vm_features.set_feature(CPU_AVX512F);
2988       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2989         vm_features.set_feature(CPU_AVX512CD);
2990       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2991         vm_features.set_feature(CPU_AVX512DQ);
2992       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2993         vm_features.set_feature(CPU_AVX512_IFMA);
2994       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2995         vm_features.set_feature(CPU_AVX512PF);
2996       if (sef_cpuid7_ebx.bits.avx512er != 0)
2997         vm_features.set_feature(CPU_AVX512ER);
2998       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2999         vm_features.set_feature(CPU_AVX512BW);
3000       if (sef_cpuid7_ebx.bits.avx512vl != 0)
3001         vm_features.set_feature(CPU_AVX512VL);
3002       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3003         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3004       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3005         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3006       if (sef_cpuid7_ecx.bits.vaes != 0)
3007         vm_features.set_feature(CPU_AVX512_VAES);
3008       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3009         vm_features.set_feature(CPU_AVX512_VNNI);
3010       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3011         vm_features.set_feature(CPU_AVX512_BITALG);
3012       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3013         vm_features.set_feature(CPU_AVX512_VBMI);
3014       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3015         vm_features.set_feature(CPU_AVX512_VBMI2);
3016     }
3017     if (is_intel()) {
3018       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
3019           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
3020           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
3021           xem_xcr0_eax.bits.opmask != 0 &&
3022           xem_xcr0_eax.bits.zmm512 != 0 &&
3023           xem_xcr0_eax.bits.zmm32 != 0) {
3024         vm_features.set_feature(CPU_AVX10_1);
3025         vm_features.set_feature(CPU_AVX512F);
3026         vm_features.set_feature(CPU_AVX512CD);
3027         vm_features.set_feature(CPU_AVX512DQ);
3028         vm_features.set_feature(CPU_AVX512PF);
3029         vm_features.set_feature(CPU_AVX512ER);
3030         vm_features.set_feature(CPU_AVX512BW);
3031         vm_features.set_feature(CPU_AVX512VL);
3032         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3033         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3034         vm_features.set_feature(CPU_AVX512_VAES);
3035         vm_features.set_feature(CPU_AVX512_VNNI);
3036         vm_features.set_feature(CPU_AVX512_BITALG);
3037         vm_features.set_feature(CPU_AVX512_VBMI);
3038         vm_features.set_feature(CPU_AVX512_VBMI2);
3039         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3040           vm_features.set_feature(CPU_AVX10_2);
3041         }
3042       }
3043     }
3044   }
3045 
3046   if (std_cpuid1_ecx.bits.hv != 0)
3047     vm_features.set_feature(CPU_HV);
3048   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3049     vm_features.set_feature(CPU_BMI1);
3050   if (std_cpuid1_edx.bits.tsc != 0)
3051     vm_features.set_feature(CPU_TSC);
3052   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3053     vm_features.set_feature(CPU_TSCINV_BIT);
3054   if (std_cpuid1_ecx.bits.aes != 0)
3055     vm_features.set_feature(CPU_AES);
3056   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3057     vm_features.set_feature(CPU_LZCNT);
3058   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3059     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3060   if (sef_cpuid7_ebx.bits.erms != 0)
3061     vm_features.set_feature(CPU_ERMS);
3062   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3063     vm_features.set_feature(CPU_FSRM);
3064   if (std_cpuid1_ecx.bits.clmul != 0)
3065     vm_features.set_feature(CPU_CLMUL);
3066   if (sef_cpuid7_ebx.bits.rtm != 0)
3067     vm_features.set_feature(CPU_RTM);
3068   if (sef_cpuid7_ebx.bits.adx != 0)
3069      vm_features.set_feature(CPU_ADX);
3070   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3071     vm_features.set_feature(CPU_BMI2);
3072   if (sef_cpuid7_ebx.bits.sha != 0)
3073     vm_features.set_feature(CPU_SHA);
3074   if (std_cpuid1_ecx.bits.fma != 0)
3075     vm_features.set_feature(CPU_FMA);
3076   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3077     vm_features.set_feature(CPU_FLUSHOPT);
3078   if (sef_cpuid7_ebx.bits.clwb != 0)
3079     vm_features.set_feature(CPU_CLWB);
3080   if (ext_cpuid1_edx.bits.rdtscp != 0)
3081     vm_features.set_feature(CPU_RDTSCP);
3082   if (sef_cpuid7_ecx.bits.rdpid != 0)
3083     vm_features.set_feature(CPU_RDPID);
3084 
3085   // AMD|Hygon additional features.
3086   if (is_amd_family()) {
3087     // PREFETCHW was checked above, check TDNOW here.
3088     if ((ext_cpuid1_edx.bits.tdnow != 0))
3089       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3090     if (ext_cpuid1_ecx.bits.sse4a != 0)
3091       vm_features.set_feature(CPU_SSE4A);
3092   }
3093 
3094   // Intel additional features.
3095   if (is_intel()) {
3096     if (sef_cpuid7_edx.bits.serialize != 0)
3097       vm_features.set_feature(CPU_SERIALIZE);
3098     if (sef_cpuid7_edx.bits.hybrid != 0)
3099       vm_features.set_feature(CPU_HYBRID);
3100     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3101       vm_features.set_feature(CPU_AVX512_FP16);
3102   }
3103 
3104   // ZX additional features.
3105   if (is_zx()) {
3106     // We do not know if these are supported by ZX, so we cannot trust
3107     // common CPUID bit for them.
3108     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3109     vm_features.clear_feature(CPU_CLWB);
3110   }
3111 
3112   // Protection key features.
3113   if (sef_cpuid7_ecx.bits.pku != 0) {
3114     vm_features.set_feature(CPU_PKU);
3115   }
3116   if (sef_cpuid7_ecx.bits.ospke != 0) {
3117     vm_features.set_feature(CPU_OSPKE);
3118   }
3119 
3120   // Control flow enforcement (CET) features.
3121   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3122     vm_features.set_feature(CPU_CET_SS);
3123   }
3124   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3125     vm_features.set_feature(CPU_CET_IBT);
3126   }
3127 
3128   // Composite features.
3129   if (supports_tscinv_bit() &&
3130       ((is_amd_family() && !is_amd_Barcelona()) ||
3131        is_intel_tsc_synched_at_init())) {
3132     vm_features.set_feature(CPU_TSCINV);
3133   }
3134   return vm_features;
3135 }
3136 
3137 bool VM_Version::os_supports_avx_vectors() {
3138   bool retVal = false;
3139   int nreg = 4;
3140   if (supports_evex()) {
3141     // Verify that OS save/restore all bits of EVEX registers
3142     // during signal processing.
3143     retVal = true;
3144     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3145       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3146         retVal = false;
3147         break;
3148       }
3149     }
3150   } else if (supports_avx()) {
3151     // Verify that OS save/restore all bits of AVX registers
3152     // during signal processing.
3153     retVal = true;
3154     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3155       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3156         retVal = false;
3157         break;
3158       }
3159     }
3160     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3161     if (retVal == false) {
3162       // Verify that OS save/restore all bits of EVEX registers
3163       // during signal processing.
3164       retVal = true;
3165       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3166         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3167           retVal = false;
3168           break;
3169         }
3170       }
3171     }
3172   }
3173   return retVal;
3174 }
3175 
3176 bool VM_Version::os_supports_apx_egprs() {
3177   if (!supports_apx_f()) {
3178     return false;
3179   }
3180   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3181       _cpuid_info.apx_save[1] != egpr_test_value()) {
3182     return false;
3183   }
3184   return true;
3185 }
3186 
3187 uint VM_Version::cores_per_cpu() {
3188   uint result = 1;
3189   if (is_intel()) {
3190     bool supports_topology = supports_processor_topology();
3191     if (supports_topology) {
3192       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3193                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3194     }
3195     if (!supports_topology || result == 0) {
3196       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3197     }
3198   } else if (is_amd_family()) {
3199     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3200     if (cpu_family() >= 0x17) { // Zen or later
3201       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3202     }
3203   } else if (is_zx()) {
3204     bool supports_topology = supports_processor_topology();
3205     if (supports_topology) {
3206       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3207                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3208     }
3209     if (!supports_topology || result == 0) {
3210       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3211     }
3212   }
3213   return result;
3214 }
3215 
3216 uint VM_Version::threads_per_core() {
3217   uint result = 1;
3218   if (is_intel() && supports_processor_topology()) {
3219     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3220   } else if (is_zx() && supports_processor_topology()) {
3221     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3222   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3223     if (cpu_family() >= 0x17) {
3224       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3225     } else {
3226       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3227                  cores_per_cpu();
3228     }
3229   }
3230   return (result == 0 ? 1 : result);
3231 }
3232 
3233 uint VM_Version::L1_line_size() {
3234   uint result = 0;
3235   if (is_intel()) {
3236     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3237   } else if (is_amd_family()) {
3238     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3239   } else if (is_zx()) {
3240     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3241   }
3242   if (result < 32) // not defined ?
3243     result = 32;   // 32 bytes by default on x86 and other x64
3244   return result;
3245 }
3246 
3247 bool VM_Version::is_intel_tsc_synched_at_init() {
3248   if (is_intel_family_core()) {
3249     uint32_t ext_model = extended_cpu_model();
3250     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3251         ext_model == CPU_MODEL_WESTMERE_EP    ||
3252         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3253         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3254       // <= 2-socket invariant tsc support. EX versions are usually used
3255       // in > 2-socket systems and likely don't synchronize tscs at
3256       // initialization.
3257       // Code that uses tsc values must be prepared for them to arbitrarily
3258       // jump forward or backward.
3259       return true;
3260     }
3261   }
3262   return false;
3263 }
3264 
3265 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3266   // Hardware prefetching (distance/size in bytes):
3267   // Pentium 3 -  64 /  32
3268   // Pentium 4 - 256 / 128
3269   // Athlon    -  64 /  32 ????
3270   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3271   // Core      - 128 /  64
3272   //
3273   // Software prefetching (distance in bytes / instruction with best score):
3274   // Pentium 3 - 128 / prefetchnta
3275   // Pentium 4 - 512 / prefetchnta
3276   // Athlon    - 128 / prefetchnta
3277   // Opteron   - 256 / prefetchnta
3278   // Core      - 256 / prefetchnta
3279   // It will be used only when AllocatePrefetchStyle > 0
3280 
3281   if (is_amd_family()) { // AMD | Hygon
3282     if (supports_sse2()) {
3283       return 256; // Opteron
3284     } else {
3285       return 128; // Athlon
3286     }
3287   } else if (is_zx()) {
3288     if (supports_sse2()) {
3289       return 256;
3290     } else {
3291       return 128;
3292     }
3293   } else { // Intel
3294     if (supports_sse3() && is_intel_server_family()) {
3295       if (is_intel_modern_cpu()) { // Nehalem based cpus
3296         return 192;
3297       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3298         return 384;
3299       }
3300     }
3301     if (supports_sse2()) {
3302       if (is_intel_server_family()) {
3303         return 256; // Pentium M, Core, Core2
3304       } else {
3305         return 512; // Pentium 4
3306       }
3307     } else {
3308       return 128; // Pentium 3 (and all other old CPUs)
3309     }
3310   }
3311 }
3312 
3313 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3314   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3315   switch (id) {
3316   case vmIntrinsics::_floatToFloat16:
3317   case vmIntrinsics::_float16ToFloat:
3318     if (!supports_float16()) {
3319       return false;
3320     }
3321     break;
3322   default:
3323     break;
3324   }
3325   return true;
3326 }
3327 
3328 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3329   int i = 0;
3330   ss.join([&]() {
3331     const char* str = nullptr;
3332     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3333       if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3334         str = _features_names[i];
3335       }
3336       i += 1;
3337     }
3338     return str;
3339   }, ", ");
3340 }
3341 
3342 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3343   VM_Features* features = (VM_Features*)features_buffer;
3344   insert_features_names(*features, ss);
3345 }
3346 
3347 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3348   VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3349   VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3350   int i = 0;
3351   ss.join([&]() {
3352     const char* str = nullptr;
3353     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3354       Feature_Flag flag = (Feature_Flag)i;
3355       if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3356         str = _features_names[i];
3357       }
3358       i += 1;
3359     }
3360     return str;
3361   }, ", ");
3362 }
3363 
3364 int VM_Version::cpu_features_size() {
3365   return sizeof(VM_Features);
3366 }
3367 
3368 void VM_Version::store_cpu_features(void* buf) {
3369   VM_Features copy = _features;
3370   copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
3371   memcpy(buf, &copy, sizeof(VM_Features));
3372 }
3373 
3374 bool VM_Version::supports_features(void* features_buffer) {
3375   VM_Features* features_to_test = (VM_Features*)features_buffer;
3376   return _features.supports_features(features_to_test);
3377 }