1 /*
   2  * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2550;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76   typedef void (*getCPUIDBrandString_stub_t)(void*);
  77 }
  78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  79 static detect_virt_stub_t detect_virt_stub = nullptr;
  80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
  82 
  83 bool VM_Version::supports_clflush() {
  84   // clflush should always be available on x86_64
  85   // if not we are in real trouble because we rely on it
  86   // to flush the code cache.
  87   // Unfortunately, Assembler::clflush is currently called as part
  88   // of generation of the code cache flush routine. This happens
  89   // under Universe::init before the processor features are set
  90   // up. Assembler::flush calls this routine to check that clflush
  91   // is allowed. So, we give the caller a free pass if Universe init
  92   // is still in progress.
  93   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  94   return true;
  95 }
  96 
  97 #define CPUID_STANDARD_FN   0x0
  98 #define CPUID_STANDARD_FN_1 0x1
  99 #define CPUID_STANDARD_FN_4 0x4
 100 #define CPUID_STANDARD_FN_B 0xb
 101 
 102 #define CPUID_EXTENDED_FN   0x80000000
 103 #define CPUID_EXTENDED_FN_1 0x80000001
 104 #define CPUID_EXTENDED_FN_2 0x80000002
 105 #define CPUID_EXTENDED_FN_3 0x80000003
 106 #define CPUID_EXTENDED_FN_4 0x80000004
 107 #define CPUID_EXTENDED_FN_7 0x80000007
 108 #define CPUID_EXTENDED_FN_8 0x80000008
 109 
 110 class VM_Version_StubGenerator: public StubCodeGenerator {
 111  public:
 112 
 113   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 114 
 115   address clear_apx_test_state() {
 116 #   define __ _masm->
 117     address start = __ pc();
 118     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 119     // handling guarantees that preserved register values post signal handling were
 120     // re-instantiated by operating system and not because they were not modified externally.
 121 
 122     bool save_apx = UseAPX;
 123     VM_Version::set_apx_cpuFeatures();
 124     UseAPX = true;
 125     // EGPR state save/restoration.
 126     __ mov64(r16, 0L);
 127     __ mov64(r31, 0L);
 128     UseAPX = save_apx;
 129     VM_Version::clean_cpuFeatures();
 130     __ ret(0);
 131     return start;
 132   }
 133 
 134   address generate_get_cpu_info() {
 135     // Flags to test CPU type.
 136     const uint32_t HS_EFL_AC = 0x40000;
 137     const uint32_t HS_EFL_ID = 0x200000;
 138     // Values for when we don't have a CPUID instruction.
 139     const int      CPU_FAMILY_SHIFT = 8;
 140     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 141     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 142     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 143 
 144     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 145     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 146     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
 147     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 148 
 149     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 150 #   define __ _masm->
 151 
 152     address start = __ pc();
 153 
 154     //
 155     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 156     //
 157     // rcx and rdx are first and second argument registers on windows
 158 
 159     __ push(rbp);
 160     __ mov(rbp, c_rarg0); // cpuid_info address
 161     __ push(rbx);
 162     __ push(rsi);
 163     __ pushf();          // preserve rbx, and flags
 164     __ pop(rax);
 165     __ push(rax);
 166     __ mov(rcx, rax);
 167     //
 168     // if we are unable to change the AC flag, we have a 386
 169     //
 170     __ xorl(rax, HS_EFL_AC);
 171     __ push(rax);
 172     __ popf();
 173     __ pushf();
 174     __ pop(rax);
 175     __ cmpptr(rax, rcx);
 176     __ jccb(Assembler::notEqual, detect_486);
 177 
 178     __ movl(rax, CPU_FAMILY_386);
 179     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 180     __ jmp(done);
 181 
 182     //
 183     // If we are unable to change the ID flag, we have a 486 which does
 184     // not support the "cpuid" instruction.
 185     //
 186     __ bind(detect_486);
 187     __ mov(rax, rcx);
 188     __ xorl(rax, HS_EFL_ID);
 189     __ push(rax);
 190     __ popf();
 191     __ pushf();
 192     __ pop(rax);
 193     __ cmpptr(rcx, rax);
 194     __ jccb(Assembler::notEqual, detect_586);
 195 
 196     __ bind(cpu486);
 197     __ movl(rax, CPU_FAMILY_486);
 198     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 199     __ jmp(done);
 200 
 201     //
 202     // At this point, we have a chip which supports the "cpuid" instruction
 203     //
 204     __ bind(detect_586);
 205     __ xorl(rax, rax);
 206     __ cpuid();
 207     __ orl(rax, rax);
 208     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 209                                         // value of at least 1, we give up and
 210                                         // assume a 486
 211     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 212     __ movl(Address(rsi, 0), rax);
 213     __ movl(Address(rsi, 4), rbx);
 214     __ movl(Address(rsi, 8), rcx);
 215     __ movl(Address(rsi,12), rdx);
 216 
 217     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 218     __ jccb(Assembler::belowEqual, std_cpuid4);
 219 
 220     //
 221     // cpuid(0xB) Processor Topology
 222     //
 223     __ movl(rax, 0xb);
 224     __ xorl(rcx, rcx);   // Threads level
 225     __ cpuid();
 226 
 227     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 228     __ movl(Address(rsi, 0), rax);
 229     __ movl(Address(rsi, 4), rbx);
 230     __ movl(Address(rsi, 8), rcx);
 231     __ movl(Address(rsi,12), rdx);
 232 
 233     __ movl(rax, 0xb);
 234     __ movl(rcx, 1);     // Cores level
 235     __ cpuid();
 236     __ push(rax);
 237     __ andl(rax, 0x1f);  // Determine if valid topology level
 238     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 239     __ andl(rax, 0xffff);
 240     __ pop(rax);
 241     __ jccb(Assembler::equal, std_cpuid4);
 242 
 243     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 244     __ movl(Address(rsi, 0), rax);
 245     __ movl(Address(rsi, 4), rbx);
 246     __ movl(Address(rsi, 8), rcx);
 247     __ movl(Address(rsi,12), rdx);
 248 
 249     __ movl(rax, 0xb);
 250     __ movl(rcx, 2);     // Packages level
 251     __ cpuid();
 252     __ push(rax);
 253     __ andl(rax, 0x1f);  // Determine if valid topology level
 254     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 255     __ andl(rax, 0xffff);
 256     __ pop(rax);
 257     __ jccb(Assembler::equal, std_cpuid4);
 258 
 259     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 260     __ movl(Address(rsi, 0), rax);
 261     __ movl(Address(rsi, 4), rbx);
 262     __ movl(Address(rsi, 8), rcx);
 263     __ movl(Address(rsi,12), rdx);
 264 
 265     //
 266     // cpuid(0x4) Deterministic cache params
 267     //
 268     __ bind(std_cpuid4);
 269     __ movl(rax, 4);
 270     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 271     __ jccb(Assembler::greater, std_cpuid1);
 272 
 273     __ xorl(rcx, rcx);   // L1 cache
 274     __ cpuid();
 275     __ push(rax);
 276     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 277     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 278     __ pop(rax);
 279     __ jccb(Assembler::equal, std_cpuid1);
 280 
 281     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 282     __ movl(Address(rsi, 0), rax);
 283     __ movl(Address(rsi, 4), rbx);
 284     __ movl(Address(rsi, 8), rcx);
 285     __ movl(Address(rsi,12), rdx);
 286 
 287     //
 288     // Standard cpuid(0x1)
 289     //
 290     __ bind(std_cpuid1);
 291     __ movl(rax, 1);
 292     __ cpuid();
 293     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 294     __ movl(Address(rsi, 0), rax);
 295     __ movl(Address(rsi, 4), rbx);
 296     __ movl(Address(rsi, 8), rcx);
 297     __ movl(Address(rsi,12), rdx);
 298 
 299     //
 300     // Check if OS has enabled XGETBV instruction to access XCR0
 301     // (OSXSAVE feature flag) and CPU supports AVX
 302     //
 303     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 304     __ cmpl(rcx, 0x18000000);
 305     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 306 
 307     //
 308     // XCR0, XFEATURE_ENABLED_MASK register
 309     //
 310     __ xorl(rcx, rcx);   // zero for XCR0 register
 311     __ xgetbv();
 312     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 313     __ movl(Address(rsi, 0), rax);
 314     __ movl(Address(rsi, 4), rdx);
 315 
 316     //
 317     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 318     //
 319     __ bind(sef_cpuid);
 320     __ movl(rax, 7);
 321     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 322     __ jccb(Assembler::greater, ext_cpuid);
 323     // ECX = 0
 324     __ xorl(rcx, rcx);
 325     __ cpuid();
 326     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 327     __ movl(Address(rsi, 0), rax);
 328     __ movl(Address(rsi, 4), rbx);
 329     __ movl(Address(rsi, 8), rcx);
 330     __ movl(Address(rsi, 12), rdx);
 331 
 332     //
 333     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 334     //
 335     __ bind(sefsl1_cpuid);
 336     __ movl(rax, 7);
 337     __ movl(rcx, 1);
 338     __ cpuid();
 339     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 340     __ movl(Address(rsi, 0), rax);
 341     __ movl(Address(rsi, 4), rdx);
 342 
 343     //
 344     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 345     //
 346     __ bind(std_cpuid29);
 347     __ movl(rax, 0x29);
 348     __ movl(rcx, 0);
 349     __ cpuid();
 350     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 351     __ movl(Address(rsi, 0), rbx);
 352 
 353     //
 354     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 355     //
 356     __ bind(std_cpuid24);
 357     __ movl(rax, 0x24);
 358     __ movl(rcx, 0);
 359     __ cpuid();
 360     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 361     __ movl(Address(rsi, 0), rax);
 362     __ movl(Address(rsi, 4), rbx);
 363 
 364     //
 365     // Extended cpuid(0x80000000)
 366     //
 367     __ bind(ext_cpuid);
 368     __ movl(rax, 0x80000000);
 369     __ cpuid();
 370     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 371     __ jcc(Assembler::belowEqual, done);
 372     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 373     __ jcc(Assembler::belowEqual, ext_cpuid1);
 374     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 375     __ jccb(Assembler::belowEqual, ext_cpuid5);
 376     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 377     __ jccb(Assembler::belowEqual, ext_cpuid7);
 378     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 379     __ jccb(Assembler::belowEqual, ext_cpuid8);
 380     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 381     __ jccb(Assembler::below, ext_cpuid8);
 382     //
 383     // Extended cpuid(0x8000001E)
 384     //
 385     __ movl(rax, 0x8000001E);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000008)
 395     //
 396     __ bind(ext_cpuid8);
 397     __ movl(rax, 0x80000008);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000007)
 407     //
 408     __ bind(ext_cpuid7);
 409     __ movl(rax, 0x80000007);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000005)
 419     //
 420     __ bind(ext_cpuid5);
 421     __ movl(rax, 0x80000005);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Extended cpuid(0x80000001)
 431     //
 432     __ bind(ext_cpuid1);
 433     __ movl(rax, 0x80000001);
 434     __ cpuid();
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 436     __ movl(Address(rsi, 0), rax);
 437     __ movl(Address(rsi, 4), rbx);
 438     __ movl(Address(rsi, 8), rcx);
 439     __ movl(Address(rsi,12), rdx);
 440 
 441     //
 442     // Check if OS has enabled XGETBV instruction to access XCR0
 443     // (OSXSAVE feature flag) and CPU supports APX
 444     //
 445     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 446     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 447     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 448     __ movl(rax, 0x200000);
 449     __ andl(rax, Address(rsi, 4));
 450     __ jcc(Assembler::equal, vector_save_restore);
 451     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 452     __ movl(rax, 0x80000);
 453     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 454     __ jcc(Assembler::equal, vector_save_restore);
 455 
 456     bool save_apx = UseAPX;
 457     VM_Version::set_apx_cpuFeatures();
 458     UseAPX = true;
 459     __ mov64(r16, VM_Version::egpr_test_value());
 460     __ mov64(r31, VM_Version::egpr_test_value());
 461     __ xorl(rsi, rsi);
 462     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 463     // Generate SEGV
 464     __ movl(rax, Address(rsi, 0));
 465 
 466     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 467     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 468     __ movq(Address(rsi, 0), r16);
 469     __ movq(Address(rsi, 8), r31);
 470 
 471     //
 472     // Query CPUID 0xD.19 for APX XSAVE offset
 473     // Extended State Enumeration Sub-leaf 19 (APX)
 474     // EAX = size of APX state (should be 128)
 475     // EBX = offset in standard XSAVE format
 476     //
 477     __ movl(rax, 0xD);
 478     __ movl(rcx, 19);
 479     __ cpuid();
 480     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
 481     __ movl(Address(rsi, 0), rax);
 482     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
 483     __ movl(Address(rsi, 0), rbx);
 484 
 485     UseAPX = save_apx;
 486     __ bind(vector_save_restore);
 487     //
 488     // Check if OS has enabled XGETBV instruction to access XCR0
 489     // (OSXSAVE feature flag) and CPU supports AVX
 490     //
 491     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 492     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 493     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 494     __ cmpl(rcx, 0x18000000);
 495     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 496 
 497     __ movl(rax, 0x6);
 498     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 499     __ cmpl(rax, 0x6);
 500     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 501 
 502     // we need to bridge farther than imm8, so we use this island as a thunk
 503     __ bind(done);
 504     __ jmp(wrapup);
 505 
 506     __ bind(start_simd_check);
 507     //
 508     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 509     // registers are not restored after a signal processing.
 510     // Generate SEGV here (reference through null)
 511     // and check upper YMM/ZMM bits after it.
 512     //
 513     int saved_useavx = UseAVX;
 514     int saved_usesse = UseSSE;
 515 
 516     // If UseAVX is uninitialized or is set by the user to include EVEX
 517     if (use_evex) {
 518       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 519       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 520       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 521       __ movl(rax, 0x10000);
 522       __ andl(rax, Address(rsi, 4));
 523       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 524       __ movl(rbx, 0x80000);
 525       __ andl(rbx, Address(rsi, 4));
 526       __ orl(rax, rbx);
 527       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 528       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 529       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 530       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 531       __ movl(rax, 0xE0);
 532       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 533       __ cmpl(rax, 0xE0);
 534       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 535 
 536       if (FLAG_IS_DEFAULT(UseAVX)) {
 537         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 538         __ movl(rax, Address(rsi, 0));
 539         __ cmpl(rax, 0x50654);              // If it is Skylake
 540         __ jcc(Assembler::equal, legacy_setup);
 541       }
 542       // EVEX setup: run in lowest evex mode
 543       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 544       UseAVX = 3;
 545       UseSSE = 2;
 546 #ifdef _WINDOWS
 547       // xmm5-xmm15 are not preserved by caller on windows
 548       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 549       __ subptr(rsp, 64);
 550       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 551       __ subptr(rsp, 64);
 552       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 553       __ subptr(rsp, 64);
 554       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 555 #endif // _WINDOWS
 556 
 557       // load value into all 64 bytes of zmm7 register
 558       __ movl(rcx, VM_Version::ymm_test_value());
 559       __ movdl(xmm0, rcx);
 560       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 561       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 562       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 563       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 564       VM_Version::clean_cpuFeatures();
 565       __ jmp(save_restore_except);
 566     }
 567 
 568     __ bind(legacy_setup);
 569     // AVX setup
 570     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 571     UseAVX = 1;
 572     UseSSE = 2;
 573 #ifdef _WINDOWS
 574     __ subptr(rsp, 32);
 575     __ vmovdqu(Address(rsp, 0), xmm7);
 576     __ subptr(rsp, 32);
 577     __ vmovdqu(Address(rsp, 0), xmm8);
 578     __ subptr(rsp, 32);
 579     __ vmovdqu(Address(rsp, 0), xmm15);
 580 #endif // _WINDOWS
 581 
 582     // load value into all 32 bytes of ymm7 register
 583     __ movl(rcx, VM_Version::ymm_test_value());
 584 
 585     __ movdl(xmm0, rcx);
 586     __ pshufd(xmm0, xmm0, 0x00);
 587     __ vinsertf128_high(xmm0, xmm0);
 588     __ vmovdqu(xmm7, xmm0);
 589     __ vmovdqu(xmm8, xmm0);
 590     __ vmovdqu(xmm15, xmm0);
 591     VM_Version::clean_cpuFeatures();
 592 
 593     __ bind(save_restore_except);
 594     __ xorl(rsi, rsi);
 595     VM_Version::set_cpuinfo_segv_addr(__ pc());
 596     // Generate SEGV
 597     __ movl(rax, Address(rsi, 0));
 598 
 599     VM_Version::set_cpuinfo_cont_addr(__ pc());
 600     // Returns here after signal. Save xmm0 to check it later.
 601 
 602     // If UseAVX is uninitialized or is set by the user to include EVEX
 603     if (use_evex) {
 604       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 605       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 606       __ movl(rax, 0x10000);
 607       __ andl(rax, Address(rsi, 4));
 608       __ jcc(Assembler::equal, legacy_save_restore);
 609       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 610       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 611       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 612       __ movl(rax, 0xE0);
 613       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 614       __ cmpl(rax, 0xE0);
 615       __ jcc(Assembler::notEqual, legacy_save_restore);
 616 
 617       if (FLAG_IS_DEFAULT(UseAVX)) {
 618         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 619         __ movl(rax, Address(rsi, 0));
 620         __ cmpl(rax, 0x50654);              // If it is Skylake
 621         __ jcc(Assembler::equal, legacy_save_restore);
 622       }
 623       // EVEX check: run in lowest evex mode
 624       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 625       UseAVX = 3;
 626       UseSSE = 2;
 627       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 628       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 629       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 630       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 631       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 632 
 633 #ifdef _WINDOWS
 634       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 635       __ addptr(rsp, 64);
 636       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 637       __ addptr(rsp, 64);
 638       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 639       __ addptr(rsp, 64);
 640 #endif // _WINDOWS
 641       generate_vzeroupper(wrapup);
 642       VM_Version::clean_cpuFeatures();
 643       UseAVX = saved_useavx;
 644       UseSSE = saved_usesse;
 645       __ jmp(wrapup);
 646    }
 647 
 648     __ bind(legacy_save_restore);
 649     // AVX check
 650     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 651     UseAVX = 1;
 652     UseSSE = 2;
 653     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 654     __ vmovdqu(Address(rsi, 0), xmm0);
 655     __ vmovdqu(Address(rsi, 32), xmm7);
 656     __ vmovdqu(Address(rsi, 64), xmm8);
 657     __ vmovdqu(Address(rsi, 96), xmm15);
 658 
 659 #ifdef _WINDOWS
 660     __ vmovdqu(xmm15, Address(rsp, 0));
 661     __ addptr(rsp, 32);
 662     __ vmovdqu(xmm8, Address(rsp, 0));
 663     __ addptr(rsp, 32);
 664     __ vmovdqu(xmm7, Address(rsp, 0));
 665     __ addptr(rsp, 32);
 666 #endif // _WINDOWS
 667 
 668     generate_vzeroupper(wrapup);
 669     VM_Version::clean_cpuFeatures();
 670     UseAVX = saved_useavx;
 671     UseSSE = saved_usesse;
 672 
 673     __ bind(wrapup);
 674     __ popf();
 675     __ pop(rsi);
 676     __ pop(rbx);
 677     __ pop(rbp);
 678     __ ret(0);
 679 
 680 #   undef __
 681 
 682     return start;
 683   };
 684   void generate_vzeroupper(Label& L_wrapup) {
 685 #   define __ _masm->
 686     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 687     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 688     __ jcc(Assembler::notEqual, L_wrapup);
 689     __ movl(rcx, 0x0FFF0FF0);
 690     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 691     __ andl(rcx, Address(rsi, 0));
 692     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 693     __ jcc(Assembler::equal, L_wrapup);
 694     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 695     __ jcc(Assembler::equal, L_wrapup);
 696     // vzeroupper() will use a pre-computed instruction sequence that we
 697     // can't compute until after we've determined CPU capabilities. Use
 698     // uncached variant here directly to be able to bootstrap correctly
 699     __ vzeroupper_uncached();
 700 #   undef __
 701   }
 702   address generate_detect_virt() {
 703     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 704 #   define __ _masm->
 705 
 706     address start = __ pc();
 707 
 708     // Evacuate callee-saved registers
 709     __ push(rbp);
 710     __ push(rbx);
 711     __ push(rsi); // for Windows
 712 
 713     __ mov(rax, c_rarg0); // CPUID leaf
 714     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 715 
 716     __ cpuid();
 717 
 718     // Store result to register array
 719     __ movl(Address(rsi,  0), rax);
 720     __ movl(Address(rsi,  4), rbx);
 721     __ movl(Address(rsi,  8), rcx);
 722     __ movl(Address(rsi, 12), rdx);
 723 
 724     // Epilogue
 725     __ pop(rsi);
 726     __ pop(rbx);
 727     __ pop(rbp);
 728     __ ret(0);
 729 
 730 #   undef __
 731 
 732     return start;
 733   };
 734 
 735 
 736   address generate_getCPUIDBrandString(void) {
 737     // Flags to test CPU type.
 738     const uint32_t HS_EFL_AC           = 0x40000;
 739     const uint32_t HS_EFL_ID           = 0x200000;
 740     // Values for when we don't have a CPUID instruction.
 741     const int      CPU_FAMILY_SHIFT = 8;
 742     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 743     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 744 
 745     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 746 
 747     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 748 #   define __ _masm->
 749 
 750     address start = __ pc();
 751 
 752     //
 753     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 754     //
 755     // rcx and rdx are first and second argument registers on windows
 756 
 757     __ push(rbp);
 758     __ mov(rbp, c_rarg0); // cpuid_info address
 759     __ push(rbx);
 760     __ push(rsi);
 761     __ pushf();          // preserve rbx, and flags
 762     __ pop(rax);
 763     __ push(rax);
 764     __ mov(rcx, rax);
 765     //
 766     // if we are unable to change the AC flag, we have a 386
 767     //
 768     __ xorl(rax, HS_EFL_AC);
 769     __ push(rax);
 770     __ popf();
 771     __ pushf();
 772     __ pop(rax);
 773     __ cmpptr(rax, rcx);
 774     __ jccb(Assembler::notEqual, detect_486);
 775 
 776     __ movl(rax, CPU_FAMILY_386);
 777     __ jmp(done);
 778 
 779     //
 780     // If we are unable to change the ID flag, we have a 486 which does
 781     // not support the "cpuid" instruction.
 782     //
 783     __ bind(detect_486);
 784     __ mov(rax, rcx);
 785     __ xorl(rax, HS_EFL_ID);
 786     __ push(rax);
 787     __ popf();
 788     __ pushf();
 789     __ pop(rax);
 790     __ cmpptr(rcx, rax);
 791     __ jccb(Assembler::notEqual, detect_586);
 792 
 793     __ bind(cpu486);
 794     __ movl(rax, CPU_FAMILY_486);
 795     __ jmp(done);
 796 
 797     //
 798     // At this point, we have a chip which supports the "cpuid" instruction
 799     //
 800     __ bind(detect_586);
 801     __ xorl(rax, rax);
 802     __ cpuid();
 803     __ orl(rax, rax);
 804     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 805                                         // value of at least 1, we give up and
 806                                         // assume a 486
 807 
 808     //
 809     // Extended cpuid(0x80000000) for processor brand string detection
 810     //
 811     __ bind(ext_cpuid);
 812     __ movl(rax, CPUID_EXTENDED_FN);
 813     __ cpuid();
 814     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 815     __ jcc(Assembler::below, done);
 816 
 817     //
 818     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 819     //
 820     __ movl(rax, CPUID_EXTENDED_FN_2);
 821     __ cpuid();
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 823     __ movl(Address(rsi, 0), rax);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 825     __ movl(Address(rsi, 0), rbx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 827     __ movl(Address(rsi, 0), rcx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 829     __ movl(Address(rsi,0), rdx);
 830 
 831     //
 832     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 833     //
 834     __ movl(rax, CPUID_EXTENDED_FN_3);
 835     __ cpuid();
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 837     __ movl(Address(rsi, 0), rax);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 839     __ movl(Address(rsi, 0), rbx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 841     __ movl(Address(rsi, 0), rcx);
 842     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 843     __ movl(Address(rsi,0), rdx);
 844 
 845     //
 846     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 847     //
 848     __ movl(rax, CPUID_EXTENDED_FN_4);
 849     __ cpuid();
 850     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 851     __ movl(Address(rsi, 0), rax);
 852     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 853     __ movl(Address(rsi, 0), rbx);
 854     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 855     __ movl(Address(rsi, 0), rcx);
 856     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 857     __ movl(Address(rsi,0), rdx);
 858 
 859     //
 860     // return
 861     //
 862     __ bind(done);
 863     __ popf();
 864     __ pop(rsi);
 865     __ pop(rbx);
 866     __ pop(rbp);
 867     __ ret(0);
 868 
 869 #   undef __
 870 
 871     return start;
 872   };
 873 };
 874 
 875 void VM_Version::get_processor_features() {
 876 
 877   _cpu = 4; // 486 by default
 878   _model = 0;
 879   _stepping = 0;
 880   _logical_processors_per_package = 1;
 881   // i486 internal cache is both I&D and has a 16-byte line size
 882   _L1_data_cache_line_size = 16;
 883 
 884   // Get raw processor info
 885 
 886   get_cpu_info_stub(&_cpuid_info);
 887 
 888   assert_is_initialized();
 889   _cpu = extended_cpu_family();
 890   _model = extended_cpu_model();
 891   _stepping = cpu_stepping();
 892 
 893   if (cpu_family() > 4) { // it supports CPUID
 894     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 895     _cpu_features = _features; // Preserve features
 896     // Logical processors are only available on P4s and above,
 897     // and only if hyperthreading is available.
 898     _logical_processors_per_package = logical_processor_count();
 899     _L1_data_cache_line_size = L1_line_size();
 900   }
 901 
 902   // xchg and xadd instructions
 903   _supports_atomic_getset4 = true;
 904   _supports_atomic_getadd4 = true;
 905   _supports_atomic_getset8 = true;
 906   _supports_atomic_getadd8 = true;
 907 
 908   // OS should support SSE for x64 and hardware should support at least SSE2.
 909   if (!VM_Version::supports_sse2()) {
 910     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 911   }
 912   // in 64 bit the use of SSE2 is the minimum
 913   if (UseSSE < 2) UseSSE = 2;
 914 
 915   // flush_icache_stub have to be generated first.
 916   // That is why Icache line size is hard coded in ICache class,
 917   // see icache_x86.hpp. It is also the reason why we can't use
 918   // clflush instruction in 32-bit VM since it could be running
 919   // on CPU which does not support it.
 920   //
 921   // The only thing we can do is to verify that flushed
 922   // ICache::line_size has correct value.
 923   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 924   // clflush_size is size in quadwords (8 bytes).
 925   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 926 
 927   // assigning this field effectively enables Unsafe.writebackMemory()
 928   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 929   // that is only implemented on x86_64 and only if the OS plays ball
 930   if (os::supports_map_sync()) {
 931     // publish data cache line flush size to generic field, otherwise
 932     // let if default to zero thereby disabling writeback
 933     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 934   }
 935 
 936   // Check if processor has Intel Ecore
 937   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 938     (supports_hybrid() ||
 939      _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
 940      _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
 941     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 942   }
 943 
 944   if (UseSSE < 4) {
 945     _features.clear_feature(CPU_SSE4_1);
 946     _features.clear_feature(CPU_SSE4_2);
 947   }
 948 
 949   if (UseSSE < 3) {
 950     _features.clear_feature(CPU_SSE3);
 951     _features.clear_feature(CPU_SSSE3);
 952     _features.clear_feature(CPU_SSE4A);
 953   }
 954 
 955   if (UseSSE < 2)
 956     _features.clear_feature(CPU_SSE2);
 957 
 958   if (UseSSE < 1)
 959     _features.clear_feature(CPU_SSE);
 960 
 961   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 962   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 963     UseAVX = 0;
 964   }
 965 
 966   // UseSSE is set to the smaller of what hardware supports and what
 967   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 968   // older Pentiums which do not support it.
 969   int use_sse_limit = 0;
 970   if (UseSSE > 0) {
 971     if (UseSSE > 3 && supports_sse4_1()) {
 972       use_sse_limit = 4;
 973     } else if (UseSSE > 2 && supports_sse3()) {
 974       use_sse_limit = 3;
 975     } else if (UseSSE > 1 && supports_sse2()) {
 976       use_sse_limit = 2;
 977     } else if (UseSSE > 0 && supports_sse()) {
 978       use_sse_limit = 1;
 979     } else {
 980       use_sse_limit = 0;
 981     }
 982   }
 983   if (FLAG_IS_DEFAULT(UseSSE)) {
 984     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 985   } else if (UseSSE > use_sse_limit) {
 986     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 987     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 988   }
 989 
 990   // first try initial setting and detect what we can support
 991   int use_avx_limit = 0;
 992   if (UseAVX > 0) {
 993     if (UseSSE < 4) {
 994       // Don't use AVX if SSE is unavailable or has been disabled.
 995       use_avx_limit = 0;
 996     } else if (UseAVX > 2 && supports_evex()) {
 997       use_avx_limit = 3;
 998     } else if (UseAVX > 1 && supports_avx2()) {
 999       use_avx_limit = 2;
1000     } else if (UseAVX > 0 && supports_avx()) {
1001       use_avx_limit = 1;
1002     } else {
1003       use_avx_limit = 0;
1004     }
1005   }
1006   if (FLAG_IS_DEFAULT(UseAVX)) {
1007     // Don't use AVX-512 on older Skylakes unless explicitly requested.
1008     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1009       FLAG_SET_DEFAULT(UseAVX, 2);
1010     } else {
1011       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1012     }
1013   }
1014 
1015   if (UseAVX > use_avx_limit) {
1016     if (UseSSE < 4) {
1017       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1018     } else {
1019       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1020     }
1021     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1022   }
1023 
1024   if (UseAVX < 3) {
1025     _features.clear_feature(CPU_AVX512F);
1026     _features.clear_feature(CPU_AVX512DQ);
1027     _features.clear_feature(CPU_AVX512CD);
1028     _features.clear_feature(CPU_AVX512BW);
1029     _features.clear_feature(CPU_AVX512ER);
1030     _features.clear_feature(CPU_AVX512PF);
1031     _features.clear_feature(CPU_AVX512VL);
1032     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1033     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1034     _features.clear_feature(CPU_AVX512_VAES);
1035     _features.clear_feature(CPU_AVX512_VNNI);
1036     _features.clear_feature(CPU_AVX512_VBMI);
1037     _features.clear_feature(CPU_AVX512_VBMI2);
1038     _features.clear_feature(CPU_AVX512_BITALG);
1039     _features.clear_feature(CPU_AVX512_IFMA);
1040     _features.clear_feature(CPU_APX_F);
1041     _features.clear_feature(CPU_AVX512_FP16);
1042     _features.clear_feature(CPU_AVX10_1);
1043     _features.clear_feature(CPU_AVX10_2);
1044   }
1045 
1046 
1047   if (UseAVX < 2) {
1048     _features.clear_feature(CPU_AVX2);
1049     _features.clear_feature(CPU_AVX_IFMA);
1050   }
1051 
1052   if (UseAVX < 1) {
1053     _features.clear_feature(CPU_AVX);
1054     _features.clear_feature(CPU_VZEROUPPER);
1055     _features.clear_feature(CPU_F16C);
1056     _features.clear_feature(CPU_SHA512);
1057   }
1058 
1059   if (logical_processors_per_package() == 1) {
1060     // HT processor could be installed on a system which doesn't support HT.
1061     _features.clear_feature(CPU_HT);
1062   }
1063 
1064   if (is_intel()) { // Intel cpus specific settings
1065     if (is_knights_family()) {
1066       _features.clear_feature(CPU_VZEROUPPER);
1067       _features.clear_feature(CPU_AVX512BW);
1068       _features.clear_feature(CPU_AVX512VL);
1069       _features.clear_feature(CPU_APX_F);
1070       _features.clear_feature(CPU_AVX512DQ);
1071       _features.clear_feature(CPU_AVX512_VNNI);
1072       _features.clear_feature(CPU_AVX512_VAES);
1073       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1074       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1075       _features.clear_feature(CPU_AVX512_VBMI);
1076       _features.clear_feature(CPU_AVX512_VBMI2);
1077       _features.clear_feature(CPU_CLWB);
1078       _features.clear_feature(CPU_FLUSHOPT);
1079       _features.clear_feature(CPU_GFNI);
1080       _features.clear_feature(CPU_AVX512_BITALG);
1081       _features.clear_feature(CPU_AVX512_IFMA);
1082       _features.clear_feature(CPU_AVX_IFMA);
1083       _features.clear_feature(CPU_AVX512_FP16);
1084       _features.clear_feature(CPU_AVX10_1);
1085       _features.clear_feature(CPU_AVX10_2);
1086     }
1087   }
1088 
1089     // Currently APX support is only enabled for targets supporting AVX512VL feature.
1090   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1091   if (UseAPX && !apx_supported) {
1092     warning("UseAPX is not supported on this CPU, setting it to false");
1093     FLAG_SET_DEFAULT(UseAPX, false);
1094   }
1095 
1096   if (!UseAPX) {
1097     _features.clear_feature(CPU_APX_F);
1098   }
1099 
1100   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1101     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1102     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1103   } else {
1104     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1105   }
1106 
1107   assert(supports_clflush(), "Always present");
1108   if (X86ICacheSync == -1) {
1109     // Auto-detect, choosing the best performant one that still flushes
1110     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1111     if (supports_clwb()) {
1112       FLAG_SET_ERGO(X86ICacheSync, 3);
1113     } else if (supports_clflushopt()) {
1114       FLAG_SET_ERGO(X86ICacheSync, 2);
1115     } else {
1116       FLAG_SET_ERGO(X86ICacheSync, 1);
1117     }
1118   } else {
1119     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1120       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1121     }
1122     if ((X86ICacheSync == 3) && !supports_clwb()) {
1123       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1124     }
1125     if ((X86ICacheSync == 5) && !supports_serialize()) {
1126       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1127     }
1128   }
1129 
1130   stringStream ss(2048);
1131   if (supports_hybrid()) {
1132     ss.print("(hybrid)");
1133   } else {
1134     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1135   }
1136   ss.print(" family %d model %d stepping %d microcode 0x%x",
1137            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1138   ss.print(", ");
1139   int features_offset = (int)ss.size();
1140   insert_features_names(_features, ss);
1141 
1142   _cpu_info_string = ss.as_string(true);
1143   _features_string = _cpu_info_string + features_offset;
1144 
1145   // Use AES instructions if available.
1146   if (supports_aes()) {
1147     if (FLAG_IS_DEFAULT(UseAES)) {
1148       FLAG_SET_DEFAULT(UseAES, true);
1149     }
1150     if (!UseAES) {
1151       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1152         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1153       }
1154       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1155       if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1156         warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1157       }
1158       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1159     } else {
1160       if (UseSSE > 2) {
1161         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1162           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1163         }
1164       } else {
1165         // The AES intrinsic stubs require AES instruction support (of course)
1166         // but also require sse3 mode or higher for instructions it use.
1167         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1168           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1169         }
1170         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1171       }
1172 
1173       // --AES-CTR begins--
1174       if (!UseAESIntrinsics) {
1175         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1176           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1177         }
1178         FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1179       } else {
1180         if (supports_sse4_1()) {
1181           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1182             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1183           }
1184         } else {
1185            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1186            // but also require sse4.1 mode or higher for instructions it use.
1187           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1188              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1189            }
1190            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1191         }
1192       }
1193       // --AES-CTR ends--
1194     }
1195   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1196     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1197       warning("AES instructions are not available on this CPU");
1198     }
1199     FLAG_SET_DEFAULT(UseAES, false);
1200     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1201       warning("AES intrinsics are not available on this CPU");
1202     }
1203     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1204     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1205       warning("AES-CTR intrinsics are not available on this CPU");
1206     }
1207     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1208   }
1209 
1210   // Use CLMUL instructions if available.
1211   if (supports_clmul()) {
1212     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1213       UseCLMUL = true;
1214     }
1215   } else if (UseCLMUL) {
1216     if (!FLAG_IS_DEFAULT(UseCLMUL))
1217       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1218     FLAG_SET_DEFAULT(UseCLMUL, false);
1219   }
1220 
1221   if (UseCLMUL && (UseSSE > 2)) {
1222     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1223       UseCRC32Intrinsics = true;
1224     }
1225   } else if (UseCRC32Intrinsics) {
1226     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1227       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1228     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1229   }
1230 
1231   if (supports_avx2()) {
1232     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1233       UseAdler32Intrinsics = true;
1234     }
1235   } else if (UseAdler32Intrinsics) {
1236     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1237       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1238     }
1239     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1240   }
1241 
1242   if (supports_sse4_2() && supports_clmul()) {
1243     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1244       UseCRC32CIntrinsics = true;
1245     }
1246   } else if (UseCRC32CIntrinsics) {
1247     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1248       warning("CRC32C intrinsics are not available on this CPU");
1249     }
1250     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1251   }
1252 
1253   // GHASH/GCM intrinsics
1254   if (UseCLMUL && (UseSSE > 2)) {
1255     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1256       UseGHASHIntrinsics = true;
1257     }
1258   } else if (UseGHASHIntrinsics) {
1259     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1260       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1261     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1262   }
1263 
1264   // ChaCha20 Intrinsics
1265   // As long as the system supports AVX as a baseline we can do a
1266   // SIMD-enabled block function.  StubGenerator makes the determination
1267   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1268   // version.
1269   if (UseAVX >= 1) {
1270       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1271           UseChaCha20Intrinsics = true;
1272       }
1273   } else if (UseChaCha20Intrinsics) {
1274       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1275           warning("ChaCha20 intrinsic requires AVX instructions");
1276       }
1277       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1278   }
1279 
1280   // Kyber Intrinsics
1281   // Currently we only have them for AVX512
1282   if (supports_evex() && supports_avx512bw()) {
1283       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1284           UseKyberIntrinsics = true;
1285       }
1286   } else
1287   if (UseKyberIntrinsics) {
1288      warning("Intrinsics for ML-KEM are not available on this CPU.");
1289      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1290   }
1291 
1292   // Dilithium Intrinsics
1293   if (UseAVX > 1) {
1294       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1295           UseDilithiumIntrinsics = true;
1296       }
1297   } else if (UseDilithiumIntrinsics) {
1298       warning("Intrinsics for ML-DSA are not available on this CPU.");
1299       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1300   }
1301 
1302   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1303   if (UseAVX >= 2) {
1304     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1305       UseBASE64Intrinsics = true;
1306     }
1307   } else if (UseBASE64Intrinsics) {
1308      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1309       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1310     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1311   }
1312 
1313   if (supports_fma()) {
1314     if (FLAG_IS_DEFAULT(UseFMA)) {
1315       UseFMA = true;
1316     }
1317   } else if (UseFMA) {
1318     warning("FMA instructions are not available on this CPU");
1319     FLAG_SET_DEFAULT(UseFMA, false);
1320   }
1321 
1322   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1323     UseMD5Intrinsics = true;
1324   }
1325 
1326   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1327     if (FLAG_IS_DEFAULT(UseSHA)) {
1328       UseSHA = true;
1329     }
1330   } else if (UseSHA) {
1331     warning("SHA instructions are not available on this CPU");
1332     FLAG_SET_DEFAULT(UseSHA, false);
1333   }
1334 
1335   if (supports_sha() && supports_sse4_1() && UseSHA) {
1336     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1337       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1338     }
1339   } else if (UseSHA1Intrinsics) {
1340     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1341     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1342   }
1343 
1344   if (supports_sse4_1() && UseSHA) {
1345     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1346       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1347     }
1348   } else if (UseSHA256Intrinsics) {
1349     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1350     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1351   }
1352 
1353   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1354     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1355       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1356     }
1357   } else if (UseSHA512Intrinsics) {
1358     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1359     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1360   }
1361 
1362   if (UseSHA && supports_evex() && supports_avx512bw()) {
1363     if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1364       FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1365     }
1366   } else if (UseSHA3Intrinsics) {
1367     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1368     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1369   }
1370 
1371   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics || UseSHA3Intrinsics)) {
1372     FLAG_SET_DEFAULT(UseSHA, false);
1373   }
1374 
1375 #if COMPILER2_OR_JVMCI
1376   int max_vector_size = 0;
1377   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1378     // 16 byte vectors (in XMM) are supported with SSE2+
1379     max_vector_size = 16;
1380   } else if (UseAVX == 1 || UseAVX == 2) {
1381     // 32 bytes vectors (in YMM) are only supported with AVX+
1382     max_vector_size = 32;
1383   } else if (UseAVX > 2) {
1384     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1385     max_vector_size = 64;
1386   }
1387 
1388   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1389 
1390   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1391     if (MaxVectorSize < min_vector_size) {
1392       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1393       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1394     }
1395     if (MaxVectorSize > max_vector_size) {
1396       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1397       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1398     }
1399     if (!is_power_of_2(MaxVectorSize)) {
1400       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1401       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1402     }
1403   } else {
1404     // If default, use highest supported configuration
1405     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1406   }
1407 
1408 #if defined(COMPILER2) && defined(ASSERT)
1409   if (MaxVectorSize > 0) {
1410     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1411       tty->print_cr("State of YMM registers after signal handle:");
1412       int nreg = 4;
1413       const char* ymm_name[4] = {"0", "7", "8", "15"};
1414       for (int i = 0; i < nreg; i++) {
1415         tty->print("YMM%s:", ymm_name[i]);
1416         for (int j = 7; j >=0; j--) {
1417           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1418         }
1419         tty->cr();
1420       }
1421     }
1422   }
1423 #endif // COMPILER2 && ASSERT
1424 
1425   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1426     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1427       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1428     }
1429   } else if (UsePoly1305Intrinsics) {
1430     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1431     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1432   }
1433 
1434   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1435     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1436       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1437     }
1438   } else if (UseIntPolyIntrinsics) {
1439     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1440     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1441   }
1442 
1443   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1444     UseMultiplyToLenIntrinsic = true;
1445   }
1446   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1447     UseSquareToLenIntrinsic = true;
1448   }
1449   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1450     UseMulAddIntrinsic = true;
1451   }
1452   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1453     UseMontgomeryMultiplyIntrinsic = true;
1454   }
1455   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1456     UseMontgomerySquareIntrinsic = true;
1457   }
1458 #endif // COMPILER2_OR_JVMCI
1459 
1460   // On new cpus instructions which update whole XMM register should be used
1461   // to prevent partial register stall due to dependencies on high half.
1462   //
1463   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1464   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1465   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1466   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1467 
1468 
1469   if (is_zx()) { // ZX cpus specific settings
1470     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1471       UseStoreImmI16 = false; // don't use it on ZX cpus
1472     }
1473     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1474       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1475         // Use it on all ZX cpus
1476         UseAddressNop = true;
1477       }
1478     }
1479     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1480       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1481     }
1482     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1483       if (supports_sse3()) {
1484         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1485       } else {
1486         UseXmmRegToRegMoveAll = false;
1487       }
1488     }
1489     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1490 #ifdef COMPILER2
1491       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1492         // For new ZX cpus do the next optimization:
1493         // don't align the beginning of a loop if there are enough instructions
1494         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1495         // in current fetch line (OptoLoopAlignment) or the padding
1496         // is big (> MaxLoopPad).
1497         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1498         // generated NOP instructions. 11 is the largest size of one
1499         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1500         MaxLoopPad = 11;
1501       }
1502 #endif // COMPILER2
1503       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1504         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1505       }
1506       if (supports_sse4_2()) { // new ZX cpus
1507         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1508           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1509         }
1510       }
1511     }
1512 
1513     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1514       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1515     }
1516   }
1517 
1518   if (is_amd_family()) { // AMD cpus specific settings
1519     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1520       // Use it on new AMD cpus starting from Opteron.
1521       UseAddressNop = true;
1522     }
1523     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1524       // Use it on new AMD cpus starting from Opteron.
1525       UseNewLongLShift = true;
1526     }
1527     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1528       if (supports_sse4a()) {
1529         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1530       } else {
1531         UseXmmLoadAndClearUpper = false;
1532       }
1533     }
1534     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1535       if (supports_sse4a()) {
1536         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1537       } else {
1538         UseXmmRegToRegMoveAll = false;
1539       }
1540     }
1541     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1542       if (supports_sse4a()) {
1543         UseXmmI2F = true;
1544       } else {
1545         UseXmmI2F = false;
1546       }
1547     }
1548     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1549       if (supports_sse4a()) {
1550         UseXmmI2D = true;
1551       } else {
1552         UseXmmI2D = false;
1553       }
1554     }
1555 
1556     // some defaults for AMD family 15h
1557     if (cpu_family() == 0x15) {
1558       // On family 15h processors default is no sw prefetch
1559       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1560         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1561       }
1562       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1563       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1564         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1565       }
1566       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1567       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1568         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1569       }
1570       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1571         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1572       }
1573     }
1574 
1575 #ifdef COMPILER2
1576     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1577       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1578       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1579     }
1580 #endif // COMPILER2
1581 
1582     // Some defaults for AMD family >= 17h && Hygon family 18h
1583     if (cpu_family() >= 0x17) {
1584       // On family >=17h processors use XMM and UnalignedLoadStores
1585       // for Array Copy
1586       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1587         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1588       }
1589       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1590         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1591       }
1592 #ifdef COMPILER2
1593       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1594         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1595       }
1596 #endif
1597     }
1598   }
1599 
1600   if (is_intel()) { // Intel cpus specific settings
1601     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1602       UseStoreImmI16 = false; // don't use it on Intel cpus
1603     }
1604     if (is_intel_server_family() || cpu_family() == 15) {
1605       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1606         // Use it on all Intel cpus starting from PentiumPro
1607         UseAddressNop = true;
1608       }
1609     }
1610     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1611       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1612     }
1613     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1614       if (supports_sse3()) {
1615         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1616       } else {
1617         UseXmmRegToRegMoveAll = false;
1618       }
1619     }
1620     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1621 #ifdef COMPILER2
1622       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1623         // For new Intel cpus do the next optimization:
1624         // don't align the beginning of a loop if there are enough instructions
1625         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1626         // in current fetch line (OptoLoopAlignment) or the padding
1627         // is big (> MaxLoopPad).
1628         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1629         // generated NOP instructions. 11 is the largest size of one
1630         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1631         MaxLoopPad = 11;
1632       }
1633 #endif // COMPILER2
1634 
1635       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1636         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1637       }
1638       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1639         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1640           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1641         }
1642       }
1643     }
1644     if (is_atom_family() || is_knights_family()) {
1645 #ifdef COMPILER2
1646       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1647         OptoScheduling = true;
1648       }
1649 #endif
1650       if (supports_sse4_2()) { // Silvermont
1651         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1652           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1653         }
1654       }
1655       if (FLAG_IS_DEFAULT(UseIncDec)) {
1656         FLAG_SET_DEFAULT(UseIncDec, false);
1657       }
1658     }
1659     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1660       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1661     }
1662   }
1663 
1664 #ifdef COMPILER2
1665   if (UseAVX > 2) {
1666     if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1667         (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1668          ArrayOperationPartialInlineSize != 0 &&
1669          ArrayOperationPartialInlineSize != 16 &&
1670          ArrayOperationPartialInlineSize != 32 &&
1671          ArrayOperationPartialInlineSize != 64)) {
1672       int inline_size = 0;
1673       if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1674         inline_size = 64;
1675       } else if (MaxVectorSize >= 32) {
1676         inline_size = 32;
1677       } else if (MaxVectorSize >= 16) {
1678         inline_size = 16;
1679       }
1680       if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1681         warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1682       }
1683       ArrayOperationPartialInlineSize = inline_size;
1684     }
1685 
1686     if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1687       ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1688       if (ArrayOperationPartialInlineSize) {
1689         warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1690       } else {
1691         warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1692       }
1693     }
1694   }
1695 
1696   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1697     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1698       OptimizeFill = false;
1699     }
1700   }
1701 #endif
1702   if (supports_sse4_2()) {
1703     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1704       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1705     }
1706   } else {
1707     if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1708       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1709     }
1710     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1711   }
1712   if (UseSSE42Intrinsics) {
1713     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1714       UseVectorizedMismatchIntrinsic = true;
1715     }
1716   } else if (UseVectorizedMismatchIntrinsic) {
1717     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1718       warning("vectorizedMismatch intrinsics are not available on this CPU");
1719     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1720   }
1721   if (UseAVX >= 2) {
1722     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1723   } else if (UseVectorizedHashCodeIntrinsic) {
1724     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1725       warning("vectorizedHashCode intrinsics are not available on this CPU");
1726     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1727   }
1728 
1729   // Use count leading zeros count instruction if available.
1730   if (supports_lzcnt()) {
1731     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1732       UseCountLeadingZerosInstruction = true;
1733     }
1734    } else if (UseCountLeadingZerosInstruction) {
1735     warning("lzcnt instruction is not available on this CPU");
1736     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1737   }
1738 
1739   // Use count trailing zeros instruction if available
1740   if (supports_bmi1()) {
1741     // tzcnt does not require VEX prefix
1742     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1743       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1744         // Don't use tzcnt if BMI1 is switched off on command line.
1745         UseCountTrailingZerosInstruction = false;
1746       } else {
1747         UseCountTrailingZerosInstruction = true;
1748       }
1749     }
1750   } else if (UseCountTrailingZerosInstruction) {
1751     warning("tzcnt instruction is not available on this CPU");
1752     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1753   }
1754 
1755   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1756   // VEX prefix is generated only when AVX > 0.
1757   if (supports_bmi1() && supports_avx()) {
1758     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1759       UseBMI1Instructions = true;
1760     }
1761   } else if (UseBMI1Instructions) {
1762     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1763     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1764   }
1765 
1766   if (supports_bmi2() && supports_avx()) {
1767     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1768       UseBMI2Instructions = true;
1769     }
1770   } else if (UseBMI2Instructions) {
1771     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1772     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1773   }
1774 
1775   // Use population count instruction if available.
1776   if (supports_popcnt()) {
1777     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1778       UsePopCountInstruction = true;
1779     }
1780   } else if (UsePopCountInstruction) {
1781     warning("POPCNT instruction is not available on this CPU");
1782     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1783   }
1784 
1785   // Use fast-string operations if available.
1786   if (supports_erms()) {
1787     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1788       UseFastStosb = true;
1789     }
1790   } else if (UseFastStosb) {
1791     warning("fast-string operations are not available on this CPU");
1792     FLAG_SET_DEFAULT(UseFastStosb, false);
1793   }
1794 
1795   // For AMD Processors use XMM/YMM MOVDQU instructions
1796   // for Object Initialization as default
1797   if (is_amd() && cpu_family() >= 0x19) {
1798     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1799       UseFastStosb = false;
1800     }
1801   }
1802 
1803 #ifdef COMPILER2
1804   if (is_intel() && MaxVectorSize > 16) {
1805     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1806       UseFastStosb = false;
1807     }
1808   }
1809 #endif
1810 
1811   // Use XMM/YMM MOVDQU instruction for Object Initialization
1812   if (UseUnalignedLoadStores) {
1813     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1814       UseXMMForObjInit = true;
1815     }
1816   } else if (UseXMMForObjInit) {
1817     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1818     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1819   }
1820 
1821 #ifdef COMPILER2
1822   if (FLAG_IS_DEFAULT(AlignVector)) {
1823     // Modern processors allow misaligned memory operations for vectors.
1824     AlignVector = !UseUnalignedLoadStores;
1825   }
1826 #endif // COMPILER2
1827 
1828   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1829     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1830       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1831     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1832       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1833     }
1834   }
1835 
1836   // Allocation prefetch settings
1837   int cache_line_size = checked_cast<int>(prefetch_data_size());
1838   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1839       (cache_line_size > AllocatePrefetchStepSize)) {
1840     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1841   }
1842 
1843   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1844     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1845     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1846       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1847     }
1848     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1849   }
1850 
1851   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1852     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1853     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1854   }
1855 
1856   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1857     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1858         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1859       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1860     }
1861 #ifdef COMPILER2
1862     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1863       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1864     }
1865 #endif
1866   }
1867 
1868   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1869 #ifdef COMPILER2
1870     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1871       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1872     }
1873 #endif
1874   }
1875 
1876   // Prefetch settings
1877 
1878   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1879   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1880   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1881   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1882 
1883   // gc copy/scan is disabled if prefetchw isn't supported, because
1884   // Prefetch::write emits an inlined prefetchw on Linux.
1885   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1886   // The used prefetcht0 instruction works for both amd64 and em64t.
1887 
1888   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1889     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1890   }
1891   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1892     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1893   }
1894 
1895   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1896      (cache_line_size > ContendedPaddingWidth))
1897      ContendedPaddingWidth = cache_line_size;
1898 
1899   // This machine allows unaligned memory accesses
1900   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1901     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1902   }
1903 
1904 #ifndef PRODUCT
1905   if (log_is_enabled(Info, os, cpu)) {
1906     LogStream ls(Log(os, cpu)::info());
1907     outputStream* log = &ls;
1908     log->print_cr("Logical CPUs per core: %u",
1909                   logical_processors_per_package());
1910     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1911     log->print("UseSSE=%d", UseSSE);
1912     if (UseAVX > 0) {
1913       log->print("  UseAVX=%d", UseAVX);
1914     }
1915     if (UseAES) {
1916       log->print("  UseAES=1");
1917     }
1918 #ifdef COMPILER2
1919     if (MaxVectorSize > 0) {
1920       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1921     }
1922 #endif
1923     log->cr();
1924     log->print("Allocation");
1925     if (AllocatePrefetchStyle <= 0) {
1926       log->print_cr(": no prefetching");
1927     } else {
1928       log->print(" prefetching: ");
1929       if (AllocatePrefetchInstr == 0) {
1930         log->print("PREFETCHNTA");
1931       } else if (AllocatePrefetchInstr == 1) {
1932         log->print("PREFETCHT0");
1933       } else if (AllocatePrefetchInstr == 2) {
1934         log->print("PREFETCHT2");
1935       } else if (AllocatePrefetchInstr == 3) {
1936         log->print("PREFETCHW");
1937       }
1938       if (AllocatePrefetchLines > 1) {
1939         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1940       } else {
1941         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1942       }
1943     }
1944 
1945     if (PrefetchCopyIntervalInBytes > 0) {
1946       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1947     }
1948     if (PrefetchScanIntervalInBytes > 0) {
1949       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1950     }
1951     if (ContendedPaddingWidth > 0) {
1952       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1953     }
1954   }
1955 #endif // !PRODUCT
1956   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1957       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1958   }
1959   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1960       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1961   }
1962 }
1963 
1964 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1965   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1966   if (vrt == XenHVM) {
1967     st->print_cr("Xen hardware-assisted virtualization detected");
1968   } else if (vrt == KVM) {
1969     st->print_cr("KVM virtualization detected");
1970   } else if (vrt == VMWare) {
1971     st->print_cr("VMWare virtualization detected");
1972     VirtualizationSupport::print_virtualization_info(st);
1973   } else if (vrt == HyperV) {
1974     st->print_cr("Hyper-V virtualization detected");
1975   } else if (vrt == HyperVRole) {
1976     st->print_cr("Hyper-V role detected");
1977   }
1978 }
1979 
1980 bool VM_Version::compute_has_intel_jcc_erratum() {
1981   if (!is_intel_family_core()) {
1982     // Only Intel CPUs are affected.
1983     return false;
1984   }
1985   // The following table of affected CPUs is based on the following document released by Intel:
1986   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1987   switch (_model) {
1988   case 0x8E:
1989     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1990     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1991     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1992     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1993     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1994     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1995     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1996     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1997     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1998     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1999   case 0x4E:
2000     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2001     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2002     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2003     return _stepping == 0x3;
2004   case 0x55:
2005     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2006     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2007     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2008     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2009     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2010     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2011     return _stepping == 0x4 || _stepping == 0x7;
2012   case 0x5E:
2013     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2014     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2015     return _stepping == 0x3;
2016   case 0x9E:
2017     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2018     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2019     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2020     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2021     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2022     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2023     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2024     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2025     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2026     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2027     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2028     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2029     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2030     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2031     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2032   case 0xA5:
2033     // Not in Intel documentation.
2034     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2035     return true;
2036   case 0xA6:
2037     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2038     return _stepping == 0x0;
2039   case 0xAE:
2040     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2041     return _stepping == 0xA;
2042   default:
2043     // If we are running on another intel machine not recognized in the table, we are okay.
2044     return false;
2045   }
2046 }
2047 
2048 // On Xen, the cpuid instruction returns
2049 //  eax / registers[0]: Version of Xen
2050 //  ebx / registers[1]: chars 'XenV'
2051 //  ecx / registers[2]: chars 'MMXe'
2052 //  edx / registers[3]: chars 'nVMM'
2053 //
2054 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2055 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2056 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2057 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2058 //
2059 // more information :
2060 // https://kb.vmware.com/s/article/1009458
2061 //
2062 void VM_Version::check_virtualizations() {
2063   uint32_t registers[4] = {0};
2064   char signature[13] = {0};
2065 
2066   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2067   // from 0x40000000 until 0x40010000.
2068   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2069   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2070     detect_virt_stub(leaf, registers);
2071     memcpy(signature, &registers[1], 12);
2072 
2073     if (strncmp("VMwareVMware", signature, 12) == 0) {
2074       Abstract_VM_Version::_detected_virtualization = VMWare;
2075       // check for extended metrics from guestlib
2076       VirtualizationSupport::initialize();
2077     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2078       Abstract_VM_Version::_detected_virtualization = HyperV;
2079 #ifdef _WINDOWS
2080       // CPUID leaf 0x40000007 is available to the root partition only.
2081       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2082       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2083       detect_virt_stub(0x40000007, registers);
2084       if ((registers[0] != 0x0) ||
2085           (registers[1] != 0x0) ||
2086           (registers[2] != 0x0) ||
2087           (registers[3] != 0x0)) {
2088         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2089       }
2090 #endif
2091     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2092       Abstract_VM_Version::_detected_virtualization = KVM;
2093     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2094       Abstract_VM_Version::_detected_virtualization = XenHVM;
2095     }
2096   }
2097 }
2098 
2099 #ifdef COMPILER2
2100 // Determine if it's running on Cascade Lake using default options.
2101 bool VM_Version::is_default_intel_cascade_lake() {
2102   return FLAG_IS_DEFAULT(UseAVX) &&
2103          FLAG_IS_DEFAULT(MaxVectorSize) &&
2104          UseAVX > 2 &&
2105          is_intel_cascade_lake();
2106 }
2107 #endif
2108 
2109 bool VM_Version::is_intel_cascade_lake() {
2110   return is_intel_skylake() && _stepping >= 5;
2111 }
2112 
2113 bool VM_Version::is_intel_darkmont() {
2114   return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2115 }
2116 
2117 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2118 // for implementing the array copy and clear operations.
2119 // The Intel platforms that supports the serialize instruction
2120 // has improved implementation of 64-byte load/stores and so the default
2121 // threshold is set to 0 for these platforms.
2122 int VM_Version::avx3_threshold() {
2123   return (is_intel_server_family() &&
2124           supports_serialize() &&
2125           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2126 }
2127 
2128 void VM_Version::clear_apx_test_state() {
2129   clear_apx_test_state_stub();
2130 }
2131 
2132 static bool _vm_version_initialized = false;
2133 
2134 void VM_Version::initialize() {
2135   ResourceMark rm;
2136 
2137   // Making this stub must be FIRST use of assembler
2138   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2139   if (stub_blob == nullptr) {
2140     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2141   }
2142   CodeBuffer c(stub_blob);
2143   VM_Version_StubGenerator g(&c);
2144 
2145   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2146                                      g.generate_get_cpu_info());
2147   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2148                                      g.generate_detect_virt());
2149   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2150                                      g.clear_apx_test_state());
2151   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2152                                      g.generate_getCPUIDBrandString());
2153   get_processor_features();
2154 
2155   Assembler::precompute_instructions();
2156 
2157   if (VM_Version::supports_hv()) { // Supports hypervisor
2158     check_virtualizations();
2159   }
2160   _vm_version_initialized = true;
2161 }
2162 
2163 typedef enum {
2164    CPU_FAMILY_8086_8088  = 0,
2165    CPU_FAMILY_INTEL_286  = 2,
2166    CPU_FAMILY_INTEL_386  = 3,
2167    CPU_FAMILY_INTEL_486  = 4,
2168    CPU_FAMILY_PENTIUM    = 5,
2169    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2170    CPU_FAMILY_PENTIUM_4  = 0xF
2171 } FamilyFlag;
2172 
2173 typedef enum {
2174   RDTSCP_FLAG  = 0x08000000, // bit 27
2175   INTEL64_FLAG = 0x20000000  // bit 29
2176 } _featureExtendedEdxFlag;
2177 
2178 typedef enum {
2179    FPU_FLAG     = 0x00000001,
2180    VME_FLAG     = 0x00000002,
2181    DE_FLAG      = 0x00000004,
2182    PSE_FLAG     = 0x00000008,
2183    TSC_FLAG     = 0x00000010,
2184    MSR_FLAG     = 0x00000020,
2185    PAE_FLAG     = 0x00000040,
2186    MCE_FLAG     = 0x00000080,
2187    CX8_FLAG     = 0x00000100,
2188    APIC_FLAG    = 0x00000200,
2189    SEP_FLAG     = 0x00000800,
2190    MTRR_FLAG    = 0x00001000,
2191    PGE_FLAG     = 0x00002000,
2192    MCA_FLAG     = 0x00004000,
2193    CMOV_FLAG    = 0x00008000,
2194    PAT_FLAG     = 0x00010000,
2195    PSE36_FLAG   = 0x00020000,
2196    PSNUM_FLAG   = 0x00040000,
2197    CLFLUSH_FLAG = 0x00080000,
2198    DTS_FLAG     = 0x00200000,
2199    ACPI_FLAG    = 0x00400000,
2200    MMX_FLAG     = 0x00800000,
2201    FXSR_FLAG    = 0x01000000,
2202    SSE_FLAG     = 0x02000000,
2203    SSE2_FLAG    = 0x04000000,
2204    SS_FLAG      = 0x08000000,
2205    HTT_FLAG     = 0x10000000,
2206    TM_FLAG      = 0x20000000
2207 } FeatureEdxFlag;
2208 
2209 // VM_Version statics
2210 enum {
2211   ExtendedFamilyIdLength_INTEL = 16,
2212   ExtendedFamilyIdLength_AMD   = 24
2213 };
2214 
2215 const size_t VENDOR_LENGTH = 13;
2216 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2217 static char* _cpu_brand_string = nullptr;
2218 static int64_t _max_qualified_cpu_frequency = 0;
2219 
2220 static int _no_of_threads = 0;
2221 static int _no_of_cores = 0;
2222 
2223 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2224   "8086/8088",
2225   "",
2226   "286",
2227   "386",
2228   "486",
2229   "Pentium",
2230   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2231   "",
2232   "",
2233   "",
2234   "",
2235   "",
2236   "",
2237   "",
2238   "",
2239   "Pentium 4"
2240 };
2241 
2242 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2243   "",
2244   "",
2245   "",
2246   "",
2247   "5x86",
2248   "K5/K6",
2249   "Athlon/AthlonXP",
2250   "",
2251   "",
2252   "",
2253   "",
2254   "",
2255   "",
2256   "",
2257   "",
2258   "Opteron/Athlon64",
2259   "Opteron QC/Phenom",  // Barcelona et.al.
2260   "",
2261   "",
2262   "",
2263   "",
2264   "",
2265   "",
2266   "Zen"
2267 };
2268 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2269 // September 2013, Vol 3C Table 35-1
2270 const char* const _model_id_pentium_pro[] = {
2271   "",
2272   "Pentium Pro",
2273   "",
2274   "Pentium II model 3",
2275   "",
2276   "Pentium II model 5/Xeon/Celeron",
2277   "Celeron",
2278   "Pentium III/Pentium III Xeon",
2279   "Pentium III/Pentium III Xeon",
2280   "Pentium M model 9",    // Yonah
2281   "Pentium III, model A",
2282   "Pentium III, model B",
2283   "",
2284   "Pentium M model D",    // Dothan
2285   "",
2286   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2287   "",
2288   "",
2289   "",
2290   "",
2291   "",
2292   "",
2293   "Celeron",              // 0x16 Celeron 65nm
2294   "Core 2",               // 0x17 Penryn / Harpertown
2295   "",
2296   "",
2297   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2298   "Atom",                 // 0x1B Z5xx series Silverthorn
2299   "",
2300   "Core 2",               // 0x1D Dunnington (6-core)
2301   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2302   "",
2303   "",
2304   "",
2305   "",
2306   "",
2307   "",
2308   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2309   "",
2310   "",
2311   "",                     // 0x28
2312   "",
2313   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2314   "",
2315   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2316   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2317   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2318   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2319   "",
2320   "",
2321   "",
2322   "",
2323   "",
2324   "",
2325   "",
2326   "",
2327   "",
2328   "",
2329   "Ivy Bridge",           // 0x3a
2330   "",
2331   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2332   "",                     // 0x3d "Next Generation Intel Core Processor"
2333   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2334   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2335   "",
2336   "",
2337   "",
2338   "",
2339   "",
2340   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2341   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2342   nullptr
2343 };
2344 
2345 /* Brand ID is for back compatibility
2346  * Newer CPUs uses the extended brand string */
2347 const char* const _brand_id[] = {
2348   "",
2349   "Celeron processor",
2350   "Pentium III processor",
2351   "Intel Pentium III Xeon processor",
2352   "",
2353   "",
2354   "",
2355   "",
2356   "Intel Pentium 4 processor",
2357   nullptr
2358 };
2359 
2360 
2361 const char* const _feature_edx_id[] = {
2362   "On-Chip FPU",
2363   "Virtual Mode Extensions",
2364   "Debugging Extensions",
2365   "Page Size Extensions",
2366   "Time Stamp Counter",
2367   "Model Specific Registers",
2368   "Physical Address Extension",
2369   "Machine Check Exceptions",
2370   "CMPXCHG8B Instruction",
2371   "On-Chip APIC",
2372   "",
2373   "Fast System Call",
2374   "Memory Type Range Registers",
2375   "Page Global Enable",
2376   "Machine Check Architecture",
2377   "Conditional Mov Instruction",
2378   "Page Attribute Table",
2379   "36-bit Page Size Extension",
2380   "Processor Serial Number",
2381   "CLFLUSH Instruction",
2382   "",
2383   "Debug Trace Store feature",
2384   "ACPI registers in MSR space",
2385   "Intel Architecture MMX Technology",
2386   "Fast Float Point Save and Restore",
2387   "Streaming SIMD extensions",
2388   "Streaming SIMD extensions 2",
2389   "Self-Snoop",
2390   "Hyper Threading",
2391   "Thermal Monitor",
2392   "",
2393   "Pending Break Enable"
2394 };
2395 
2396 const char* const _feature_extended_edx_id[] = {
2397   "",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "",
2408   "SYSCALL/SYSRET",
2409   "",
2410   "",
2411   "",
2412   "",
2413   "",
2414   "",
2415   "",
2416   "",
2417   "Execute Disable Bit",
2418   "",
2419   "",
2420   "",
2421   "",
2422   "",
2423   "",
2424   "RDTSCP",
2425   "",
2426   "Intel 64 Architecture",
2427   "",
2428   ""
2429 };
2430 
2431 const char* const _feature_ecx_id[] = {
2432   "Streaming SIMD Extensions 3",
2433   "PCLMULQDQ",
2434   "64-bit DS Area",
2435   "MONITOR/MWAIT instructions",
2436   "CPL Qualified Debug Store",
2437   "Virtual Machine Extensions",
2438   "Safer Mode Extensions",
2439   "Enhanced Intel SpeedStep technology",
2440   "Thermal Monitor 2",
2441   "Supplemental Streaming SIMD Extensions 3",
2442   "L1 Context ID",
2443   "",
2444   "Fused Multiply-Add",
2445   "CMPXCHG16B",
2446   "xTPR Update Control",
2447   "Perfmon and Debug Capability",
2448   "",
2449   "Process-context identifiers",
2450   "Direct Cache Access",
2451   "Streaming SIMD extensions 4.1",
2452   "Streaming SIMD extensions 4.2",
2453   "x2APIC",
2454   "MOVBE",
2455   "Popcount instruction",
2456   "TSC-Deadline",
2457   "AESNI",
2458   "XSAVE",
2459   "OSXSAVE",
2460   "AVX",
2461   "F16C",
2462   "RDRAND",
2463   ""
2464 };
2465 
2466 const char* const _feature_extended_ecx_id[] = {
2467   "LAHF/SAHF instruction support",
2468   "Core multi-processor legacy mode",
2469   "",
2470   "",
2471   "",
2472   "Advanced Bit Manipulations: LZCNT",
2473   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2474   "Misaligned SSE mode",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "",
2485   "",
2486   "",
2487   "",
2488   "",
2489   "",
2490   "",
2491   "",
2492   "",
2493   "",
2494   "",
2495   "",
2496   "",
2497   "",
2498   ""
2499 };
2500 
2501 const char* VM_Version::cpu_model_description(void) {
2502   uint32_t cpu_family = extended_cpu_family();
2503   uint32_t cpu_model = extended_cpu_model();
2504   const char* model = nullptr;
2505 
2506   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2507     for (uint32_t i = 0; i <= cpu_model; i++) {
2508       model = _model_id_pentium_pro[i];
2509       if (model == nullptr) {
2510         break;
2511       }
2512     }
2513   }
2514   return model;
2515 }
2516 
2517 const char* VM_Version::cpu_brand_string(void) {
2518   if (_cpu_brand_string == nullptr) {
2519     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2520     if (nullptr == _cpu_brand_string) {
2521       return nullptr;
2522     }
2523     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2524     if (ret_val != OS_OK) {
2525       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2526       _cpu_brand_string = nullptr;
2527     }
2528   }
2529   return _cpu_brand_string;
2530 }
2531 
2532 const char* VM_Version::cpu_brand(void) {
2533   const char*  brand  = nullptr;
2534 
2535   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2536     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2537     brand = _brand_id[0];
2538     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2539       brand = _brand_id[i];
2540     }
2541   }
2542   return brand;
2543 }
2544 
2545 bool VM_Version::cpu_is_em64t(void) {
2546   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2547 }
2548 
2549 bool VM_Version::is_netburst(void) {
2550   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2551 }
2552 
2553 bool VM_Version::supports_tscinv_ext(void) {
2554   if (!supports_tscinv_bit()) {
2555     return false;
2556   }
2557 
2558   if (is_intel()) {
2559     return true;
2560   }
2561 
2562   if (is_amd()) {
2563     return !is_amd_Barcelona();
2564   }
2565 
2566   if (is_hygon()) {
2567     return true;
2568   }
2569 
2570   return false;
2571 }
2572 
2573 void VM_Version::resolve_cpu_information_details(void) {
2574 
2575   // in future we want to base this information on proper cpu
2576   // and cache topology enumeration such as:
2577   // Intel 64 Architecture Processor Topology Enumeration
2578   // which supports system cpu and cache topology enumeration
2579   // either using 2xAPICIDs or initial APICIDs
2580 
2581   // currently only rough cpu information estimates
2582   // which will not necessarily reflect the exact configuration of the system
2583 
2584   // this is the number of logical hardware threads
2585   // visible to the operating system
2586   _no_of_threads = os::processor_count();
2587 
2588   // find out number of threads per cpu package
2589   int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2590   if (threads_per_package == 0) {
2591     // Fallback code to avoid div by zero in subsequent code.
2592     // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2593     threads_per_package = threads_per_core() * cores_per_cpu();
2594   }
2595 
2596   // use amount of threads visible to the process in order to guess number of sockets
2597   _no_of_sockets = _no_of_threads / threads_per_package;
2598 
2599   // process might only see a subset of the total number of threads
2600   // from a single processor package. Virtualization/resource management for example.
2601   // If so then just write a hard 1 as num of pkgs.
2602   if (0 == _no_of_sockets) {
2603     _no_of_sockets = 1;
2604   }
2605 
2606   // estimate the number of cores
2607   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2608 }
2609 
2610 
2611 const char* VM_Version::cpu_family_description(void) {
2612   int cpu_family_id = extended_cpu_family();
2613   if (is_amd()) {
2614     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2615       return _family_id_amd[cpu_family_id];
2616     }
2617   }
2618   if (is_intel()) {
2619     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2620       return cpu_model_description();
2621     }
2622     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2623       return _family_id_intel[cpu_family_id];
2624     }
2625   }
2626   if (is_hygon()) {
2627     return "Dhyana";
2628   }
2629   return "Unknown x86";
2630 }
2631 
2632 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2633   assert(buf != nullptr, "buffer is null!");
2634   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2635 
2636   const char* cpu_type = nullptr;
2637   const char* x64 = nullptr;
2638 
2639   if (is_intel()) {
2640     cpu_type = "Intel";
2641     x64 = cpu_is_em64t() ? " Intel64" : "";
2642   } else if (is_amd()) {
2643     cpu_type = "AMD";
2644     x64 = cpu_is_em64t() ? " AMD64" : "";
2645   } else if (is_hygon()) {
2646     cpu_type = "Hygon";
2647     x64 = cpu_is_em64t() ? " AMD64" : "";
2648   } else {
2649     cpu_type = "Unknown x86";
2650     x64 = cpu_is_em64t() ? " x86_64" : "";
2651   }
2652 
2653   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2654     cpu_type,
2655     cpu_family_description(),
2656     supports_ht() ? " (HT)" : "",
2657     supports_sse3() ? " SSE3" : "",
2658     supports_ssse3() ? " SSSE3" : "",
2659     supports_sse4_1() ? " SSE4.1" : "",
2660     supports_sse4_2() ? " SSE4.2" : "",
2661     supports_sse4a() ? " SSE4A" : "",
2662     is_netburst() ? " Netburst" : "",
2663     is_intel_family_core() ? " Core" : "",
2664     x64);
2665 
2666   return OS_OK;
2667 }
2668 
2669 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2670   assert(buf != nullptr, "buffer is null!");
2671   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2672   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2673 
2674   // invoke newly generated asm code to fetch CPU Brand String
2675   getCPUIDBrandString_stub(&_cpuid_info);
2676 
2677   // fetch results into buffer
2678   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2679   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2680   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2681   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2682   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2683   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2684   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2685   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2686   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2687   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2688   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2689   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2690 
2691   return OS_OK;
2692 }
2693 
2694 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2695   guarantee(buf != nullptr, "buffer is null!");
2696   guarantee(buf_len > 0, "buffer len not enough!");
2697 
2698   unsigned int flag = 0;
2699   unsigned int fi = 0;
2700   size_t       written = 0;
2701   const char*  prefix = "";
2702 
2703 #define WRITE_TO_BUF(string)                                                          \
2704   {                                                                                   \
2705     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2706     if (res < 0) {                                                                    \
2707       return buf_len - 1;                                                             \
2708     }                                                                                 \
2709     written += res;                                                                   \
2710     if (prefix[0] == '\0') {                                                          \
2711       prefix = ", ";                                                                  \
2712     }                                                                                 \
2713   }
2714 
2715   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2716     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2717       continue; /* no hyperthreading */
2718     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2719       continue; /* no fast system call */
2720     }
2721     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2722       WRITE_TO_BUF(_feature_edx_id[fi]);
2723     }
2724   }
2725 
2726   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2727     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2728       WRITE_TO_BUF(_feature_ecx_id[fi]);
2729     }
2730   }
2731 
2732   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2733     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2734       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2735     }
2736   }
2737 
2738   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2739     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2740       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2741     }
2742   }
2743 
2744   if (supports_tscinv_bit()) {
2745       WRITE_TO_BUF("Invariant TSC");
2746   }
2747 
2748   if (supports_hybrid()) {
2749       WRITE_TO_BUF("Hybrid Architecture");
2750   }
2751 
2752   return written;
2753 }
2754 
2755 /**
2756  * Write a detailed description of the cpu to a given buffer, including
2757  * feature set.
2758  */
2759 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2760   assert(buf != nullptr, "buffer is null!");
2761   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2762 
2763   static const char* unknown = "<unknown>";
2764   char               vendor_id[VENDOR_LENGTH];
2765   const char*        family = nullptr;
2766   const char*        model = nullptr;
2767   const char*        brand = nullptr;
2768   int                outputLen = 0;
2769 
2770   family = cpu_family_description();
2771   if (family == nullptr) {
2772     family = unknown;
2773   }
2774 
2775   model = cpu_model_description();
2776   if (model == nullptr) {
2777     model = unknown;
2778   }
2779 
2780   brand = cpu_brand_string();
2781 
2782   if (brand == nullptr) {
2783     brand = cpu_brand();
2784     if (brand == nullptr) {
2785       brand = unknown;
2786     }
2787   }
2788 
2789   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2790   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2791   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2792   vendor_id[VENDOR_LENGTH-1] = '\0';
2793 
2794   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2795     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2796     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2797     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2798     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2799     "Supports: ",
2800     brand,
2801     vendor_id,
2802     family,
2803     extended_cpu_family(),
2804     model,
2805     extended_cpu_model(),
2806     cpu_stepping(),
2807     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2808     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2809     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2810     _cpuid_info.std_cpuid1_eax.value,
2811     _cpuid_info.std_cpuid1_ebx.value,
2812     _cpuid_info.std_cpuid1_ecx.value,
2813     _cpuid_info.std_cpuid1_edx.value,
2814     _cpuid_info.ext_cpuid1_eax,
2815     _cpuid_info.ext_cpuid1_ebx,
2816     _cpuid_info.ext_cpuid1_ecx,
2817     _cpuid_info.ext_cpuid1_edx);
2818 
2819   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2820     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2821     return OS_ERR;
2822   }
2823 
2824   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2825 
2826   return OS_OK;
2827 }
2828 
2829 
2830 // Fill in Abstract_VM_Version statics
2831 void VM_Version::initialize_cpu_information() {
2832   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2833   assert(!_initialized, "shouldn't be initialized yet");
2834   resolve_cpu_information_details();
2835 
2836   // initialize cpu_name and cpu_desc
2837   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2838   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2839   _initialized = true;
2840 }
2841 
2842 /**
2843  *  For information about extracting the frequency from the cpu brand string, please see:
2844  *
2845  *    Intel Processor Identification and the CPUID Instruction
2846  *    Application Note 485
2847  *    May 2012
2848  *
2849  * The return value is the frequency in Hz.
2850  */
2851 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2852   const char* const brand_string = cpu_brand_string();
2853   if (brand_string == nullptr) {
2854     return 0;
2855   }
2856   const int64_t MEGA = 1000000;
2857   int64_t multiplier = 0;
2858   int64_t frequency = 0;
2859   uint8_t idx = 0;
2860   // The brand string buffer is at most 48 bytes.
2861   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2862   for (; idx < 48-2; ++idx) {
2863     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2864     // Search brand string for "yHz" where y is M, G, or T.
2865     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2866       if (brand_string[idx] == 'M') {
2867         multiplier = MEGA;
2868       } else if (brand_string[idx] == 'G') {
2869         multiplier = MEGA * 1000;
2870       } else if (brand_string[idx] == 'T') {
2871         multiplier = MEGA * MEGA;
2872       }
2873       break;
2874     }
2875   }
2876   if (multiplier > 0) {
2877     // Compute frequency (in Hz) from brand string.
2878     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2879       frequency =  (brand_string[idx-4] - '0') * multiplier;
2880       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2881       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2882     } else { // format is "xxxx"
2883       frequency =  (brand_string[idx-4] - '0') * 1000;
2884       frequency += (brand_string[idx-3] - '0') * 100;
2885       frequency += (brand_string[idx-2] - '0') * 10;
2886       frequency += (brand_string[idx-1] - '0');
2887       frequency *= multiplier;
2888     }
2889   }
2890   return frequency;
2891 }
2892 
2893 
2894 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2895   if (_max_qualified_cpu_frequency == 0) {
2896     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2897   }
2898   return _max_qualified_cpu_frequency;
2899 }
2900 
2901 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2902   VM_Features vm_features;
2903   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2904     vm_features.set_feature(CPU_CX8);
2905   if (std_cpuid1_edx.bits.cmov != 0)
2906     vm_features.set_feature(CPU_CMOV);
2907   if (std_cpuid1_edx.bits.clflush != 0)
2908     vm_features.set_feature(CPU_FLUSH);
2909   // clflush should always be available on x86_64
2910   // if not we are in real trouble because we rely on it
2911   // to flush the code cache.
2912   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2913   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2914       ext_cpuid1_edx.bits.fxsr != 0))
2915     vm_features.set_feature(CPU_FXSR);
2916   // HT flag is set for multi-core processors also.
2917   if (threads_per_core() > 1)
2918     vm_features.set_feature(CPU_HT);
2919   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2920       ext_cpuid1_edx.bits.mmx != 0))
2921     vm_features.set_feature(CPU_MMX);
2922   if (std_cpuid1_edx.bits.sse != 0)
2923     vm_features.set_feature(CPU_SSE);
2924   if (std_cpuid1_edx.bits.sse2 != 0)
2925     vm_features.set_feature(CPU_SSE2);
2926   if (std_cpuid1_ecx.bits.sse3 != 0)
2927     vm_features.set_feature(CPU_SSE3);
2928   if (std_cpuid1_ecx.bits.ssse3 != 0)
2929     vm_features.set_feature(CPU_SSSE3);
2930   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2931     vm_features.set_feature(CPU_SSE4_1);
2932   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2933     vm_features.set_feature(CPU_SSE4_2);
2934   if (std_cpuid1_ecx.bits.popcnt != 0)
2935     vm_features.set_feature(CPU_POPCNT);
2936   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2937       xem_xcr0_eax.bits.apx_f != 0 &&
2938       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2939     vm_features.set_feature(CPU_APX_F);
2940   }
2941   if (std_cpuid1_ecx.bits.avx != 0 &&
2942       std_cpuid1_ecx.bits.osxsave != 0 &&
2943       xem_xcr0_eax.bits.sse != 0 &&
2944       xem_xcr0_eax.bits.ymm != 0) {
2945     vm_features.set_feature(CPU_AVX);
2946     vm_features.set_feature(CPU_VZEROUPPER);
2947     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2948       vm_features.set_feature(CPU_SHA512);
2949     if (std_cpuid1_ecx.bits.f16c != 0)
2950       vm_features.set_feature(CPU_F16C);
2951     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2952       vm_features.set_feature(CPU_AVX2);
2953       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2954         vm_features.set_feature(CPU_AVX_IFMA);
2955     }
2956     if (sef_cpuid7_ecx.bits.gfni != 0)
2957         vm_features.set_feature(CPU_GFNI);
2958     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2959         xem_xcr0_eax.bits.opmask != 0 &&
2960         xem_xcr0_eax.bits.zmm512 != 0 &&
2961         xem_xcr0_eax.bits.zmm32 != 0) {
2962       vm_features.set_feature(CPU_AVX512F);
2963       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2964         vm_features.set_feature(CPU_AVX512CD);
2965       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2966         vm_features.set_feature(CPU_AVX512DQ);
2967       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2968         vm_features.set_feature(CPU_AVX512_IFMA);
2969       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2970         vm_features.set_feature(CPU_AVX512PF);
2971       if (sef_cpuid7_ebx.bits.avx512er != 0)
2972         vm_features.set_feature(CPU_AVX512ER);
2973       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2974         vm_features.set_feature(CPU_AVX512BW);
2975       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2976         vm_features.set_feature(CPU_AVX512VL);
2977       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2978         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2979       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2980         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2981       if (sef_cpuid7_ecx.bits.vaes != 0)
2982         vm_features.set_feature(CPU_AVX512_VAES);
2983       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2984         vm_features.set_feature(CPU_AVX512_VNNI);
2985       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2986         vm_features.set_feature(CPU_AVX512_BITALG);
2987       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2988         vm_features.set_feature(CPU_AVX512_VBMI);
2989       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2990         vm_features.set_feature(CPU_AVX512_VBMI2);
2991     }
2992     if (is_intel()) {
2993       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2994           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2995           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2996           xem_xcr0_eax.bits.opmask != 0 &&
2997           xem_xcr0_eax.bits.zmm512 != 0 &&
2998           xem_xcr0_eax.bits.zmm32 != 0) {
2999         vm_features.set_feature(CPU_AVX10_1);
3000         vm_features.set_feature(CPU_AVX512F);
3001         vm_features.set_feature(CPU_AVX512CD);
3002         vm_features.set_feature(CPU_AVX512DQ);
3003         vm_features.set_feature(CPU_AVX512PF);
3004         vm_features.set_feature(CPU_AVX512ER);
3005         vm_features.set_feature(CPU_AVX512BW);
3006         vm_features.set_feature(CPU_AVX512VL);
3007         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3008         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3009         vm_features.set_feature(CPU_AVX512_VAES);
3010         vm_features.set_feature(CPU_AVX512_VNNI);
3011         vm_features.set_feature(CPU_AVX512_BITALG);
3012         vm_features.set_feature(CPU_AVX512_VBMI);
3013         vm_features.set_feature(CPU_AVX512_VBMI2);
3014         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3015           vm_features.set_feature(CPU_AVX10_2);
3016         }
3017       }
3018     }
3019   }
3020 
3021   if (std_cpuid1_ecx.bits.hv != 0)
3022     vm_features.set_feature(CPU_HV);
3023   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3024     vm_features.set_feature(CPU_BMI1);
3025   if (std_cpuid1_edx.bits.tsc != 0)
3026     vm_features.set_feature(CPU_TSC);
3027   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3028     vm_features.set_feature(CPU_TSCINV_BIT);
3029   if (std_cpuid1_ecx.bits.aes != 0)
3030     vm_features.set_feature(CPU_AES);
3031   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3032     vm_features.set_feature(CPU_LZCNT);
3033   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3034     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3035   if (sef_cpuid7_ebx.bits.erms != 0)
3036     vm_features.set_feature(CPU_ERMS);
3037   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3038     vm_features.set_feature(CPU_FSRM);
3039   if (std_cpuid1_ecx.bits.clmul != 0)
3040     vm_features.set_feature(CPU_CLMUL);
3041   if (sef_cpuid7_ebx.bits.rtm != 0)
3042     vm_features.set_feature(CPU_RTM);
3043   if (sef_cpuid7_ebx.bits.adx != 0)
3044      vm_features.set_feature(CPU_ADX);
3045   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3046     vm_features.set_feature(CPU_BMI2);
3047   if (sef_cpuid7_ebx.bits.sha != 0)
3048     vm_features.set_feature(CPU_SHA);
3049   if (std_cpuid1_ecx.bits.fma != 0)
3050     vm_features.set_feature(CPU_FMA);
3051   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3052     vm_features.set_feature(CPU_FLUSHOPT);
3053   if (sef_cpuid7_ebx.bits.clwb != 0)
3054     vm_features.set_feature(CPU_CLWB);
3055   if (ext_cpuid1_edx.bits.rdtscp != 0)
3056     vm_features.set_feature(CPU_RDTSCP);
3057   if (sef_cpuid7_ecx.bits.rdpid != 0)
3058     vm_features.set_feature(CPU_RDPID);
3059 
3060   // AMD|Hygon additional features.
3061   if (is_amd_family()) {
3062     // PREFETCHW was checked above, check TDNOW here.
3063     if ((ext_cpuid1_edx.bits.tdnow != 0))
3064       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3065     if (ext_cpuid1_ecx.bits.sse4a != 0)
3066       vm_features.set_feature(CPU_SSE4A);
3067   }
3068 
3069   // Intel additional features.
3070   if (is_intel()) {
3071     if (sef_cpuid7_edx.bits.serialize != 0)
3072       vm_features.set_feature(CPU_SERIALIZE);
3073     if (sef_cpuid7_edx.bits.hybrid != 0)
3074       vm_features.set_feature(CPU_HYBRID);
3075     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3076       vm_features.set_feature(CPU_AVX512_FP16);
3077   }
3078 
3079   // ZX additional features.
3080   if (is_zx()) {
3081     // We do not know if these are supported by ZX, so we cannot trust
3082     // common CPUID bit for them.
3083     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3084     vm_features.clear_feature(CPU_CLWB);
3085   }
3086 
3087   // Protection key features.
3088   if (sef_cpuid7_ecx.bits.pku != 0) {
3089     vm_features.set_feature(CPU_PKU);
3090   }
3091   if (sef_cpuid7_ecx.bits.ospke != 0) {
3092     vm_features.set_feature(CPU_OSPKE);
3093   }
3094 
3095   // Control flow enforcement (CET) features.
3096   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3097     vm_features.set_feature(CPU_CET_SS);
3098   }
3099   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3100     vm_features.set_feature(CPU_CET_IBT);
3101   }
3102 
3103   // Composite features.
3104   if (supports_tscinv_bit() &&
3105       ((is_amd_family() && !is_amd_Barcelona()) ||
3106        is_intel_tsc_synched_at_init())) {
3107     vm_features.set_feature(CPU_TSCINV);
3108   }
3109   return vm_features;
3110 }
3111 
3112 bool VM_Version::os_supports_avx_vectors() {
3113   bool retVal = false;
3114   int nreg = 4;
3115   if (supports_evex()) {
3116     // Verify that OS save/restore all bits of EVEX registers
3117     // during signal processing.
3118     retVal = true;
3119     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3120       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3121         retVal = false;
3122         break;
3123       }
3124     }
3125   } else if (supports_avx()) {
3126     // Verify that OS save/restore all bits of AVX registers
3127     // during signal processing.
3128     retVal = true;
3129     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3130       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3131         retVal = false;
3132         break;
3133       }
3134     }
3135     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3136     if (retVal == false) {
3137       // Verify that OS save/restore all bits of EVEX registers
3138       // during signal processing.
3139       retVal = true;
3140       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3141         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3142           retVal = false;
3143           break;
3144         }
3145       }
3146     }
3147   }
3148   return retVal;
3149 }
3150 
3151 bool VM_Version::os_supports_apx_egprs() {
3152   if (!supports_apx_f()) {
3153     return false;
3154   }
3155   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3156       _cpuid_info.apx_save[1] != egpr_test_value()) {
3157     return false;
3158   }
3159   return true;
3160 }
3161 
3162 uint VM_Version::cores_per_cpu() {
3163   uint result = 1;
3164   if (is_intel()) {
3165     bool supports_topology = supports_processor_topology();
3166     if (supports_topology) {
3167       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3168                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3169     }
3170     if (!supports_topology || result == 0) {
3171       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3172     }
3173   } else if (is_amd_family()) {
3174     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3175     if (cpu_family() >= 0x17) { // Zen or later
3176       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3177     }
3178   } else if (is_zx()) {
3179     bool supports_topology = supports_processor_topology();
3180     if (supports_topology) {
3181       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3182                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3183     }
3184     if (!supports_topology || result == 0) {
3185       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3186     }
3187   }
3188   return result;
3189 }
3190 
3191 uint VM_Version::threads_per_core() {
3192   uint result = 1;
3193   if (is_intel() && supports_processor_topology()) {
3194     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3195   } else if (is_zx() && supports_processor_topology()) {
3196     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3197   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3198     if (cpu_family() >= 0x17) {
3199       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3200     } else {
3201       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3202                  cores_per_cpu();
3203     }
3204   }
3205   return (result == 0 ? 1 : result);
3206 }
3207 
3208 uint VM_Version::L1_line_size() {
3209   uint result = 0;
3210   if (is_intel()) {
3211     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3212   } else if (is_amd_family()) {
3213     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3214   } else if (is_zx()) {
3215     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3216   }
3217   if (result < 32) // not defined ?
3218     result = 32;   // 32 bytes by default on x86 and other x64
3219   return result;
3220 }
3221 
3222 bool VM_Version::is_intel_tsc_synched_at_init() {
3223   if (is_intel_family_core()) {
3224     uint32_t ext_model = extended_cpu_model();
3225     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3226         ext_model == CPU_MODEL_WESTMERE_EP    ||
3227         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3228         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3229       // <= 2-socket invariant tsc support. EX versions are usually used
3230       // in > 2-socket systems and likely don't synchronize tscs at
3231       // initialization.
3232       // Code that uses tsc values must be prepared for them to arbitrarily
3233       // jump forward or backward.
3234       return true;
3235     }
3236   }
3237   return false;
3238 }
3239 
3240 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3241   // Hardware prefetching (distance/size in bytes):
3242   // Pentium 3 -  64 /  32
3243   // Pentium 4 - 256 / 128
3244   // Athlon    -  64 /  32 ????
3245   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3246   // Core      - 128 /  64
3247   //
3248   // Software prefetching (distance in bytes / instruction with best score):
3249   // Pentium 3 - 128 / prefetchnta
3250   // Pentium 4 - 512 / prefetchnta
3251   // Athlon    - 128 / prefetchnta
3252   // Opteron   - 256 / prefetchnta
3253   // Core      - 256 / prefetchnta
3254   // It will be used only when AllocatePrefetchStyle > 0
3255 
3256   if (is_amd_family()) { // AMD | Hygon
3257     if (supports_sse2()) {
3258       return 256; // Opteron
3259     } else {
3260       return 128; // Athlon
3261     }
3262   } else { // Intel
3263     if (supports_sse3() && is_intel_server_family()) {
3264       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3265         return 192;
3266       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3267         return 384;
3268       }
3269     }
3270     if (supports_sse2()) {
3271       if (is_intel_server_family()) {
3272         return 256; // Pentium M, Core, Core2
3273       } else {
3274         return 512; // Pentium 4
3275       }
3276     } else {
3277       return 128; // Pentium 3 (and all other old CPUs)
3278     }
3279   }
3280 }
3281 
3282 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3283   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3284   switch (id) {
3285   case vmIntrinsics::_floatToFloat16:
3286   case vmIntrinsics::_float16ToFloat:
3287     if (!supports_float16()) {
3288       return false;
3289     }
3290     break;
3291   default:
3292     break;
3293   }
3294   return true;
3295 }
3296 
3297 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3298   int i = 0;
3299   ss.join([&]() {
3300     const char* str = nullptr;
3301     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3302       if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3303         str = _features_names[i];
3304       }
3305       i += 1;
3306     }
3307     return str;
3308   }, ", ");
3309 }
3310 
3311 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3312   VM_Features* features = (VM_Features*)features_buffer;
3313   insert_features_names(*features, ss);
3314 }
3315 
3316 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3317   VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3318   VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3319   int i = 0;
3320   ss.join([&]() {
3321     const char* str = nullptr;
3322     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3323       Feature_Flag flag = (Feature_Flag)i;
3324       if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3325         str = _features_names[i];
3326       }
3327       i += 1;
3328     }
3329     return str;
3330   }, ", ");
3331 }
3332 
3333 int VM_Version::cpu_features_size() {
3334   return sizeof(VM_Features);
3335 }
3336 
3337 void VM_Version::store_cpu_features(void* buf) {
3338   VM_Features copy = _features;
3339   copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
3340   memcpy(buf, &copy, sizeof(VM_Features));
3341 }
3342 
3343 bool VM_Version::supports_features(void* features_buffer) {
3344   VM_Features* features_to_test = (VM_Features*)features_buffer;
3345   return _features.supports_features(features_to_test);
3346 }