1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2000;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76 }
  77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  78 static detect_virt_stub_t detect_virt_stub = nullptr;
  79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  80 
  81 bool VM_Version::supports_clflush() {
  82   // clflush should always be available on x86_64
  83   // if not we are in real trouble because we rely on it
  84   // to flush the code cache.
  85   // Unfortunately, Assembler::clflush is currently called as part
  86   // of generation of the code cache flush routine. This happens
  87   // under Universe::init before the processor features are set
  88   // up. Assembler::flush calls this routine to check that clflush
  89   // is allowed. So, we give the caller a free pass if Universe init
  90   // is still in progress.
  91   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  92   return true;
  93 }
  94 
  95 #define CPUID_STANDARD_FN   0x0
  96 #define CPUID_STANDARD_FN_1 0x1
  97 #define CPUID_STANDARD_FN_4 0x4
  98 #define CPUID_STANDARD_FN_B 0xb
  99 
 100 #define CPUID_EXTENDED_FN   0x80000000
 101 #define CPUID_EXTENDED_FN_1 0x80000001
 102 #define CPUID_EXTENDED_FN_2 0x80000002
 103 #define CPUID_EXTENDED_FN_3 0x80000003
 104 #define CPUID_EXTENDED_FN_4 0x80000004
 105 #define CPUID_EXTENDED_FN_7 0x80000007
 106 #define CPUID_EXTENDED_FN_8 0x80000008
 107 
 108 class VM_Version_StubGenerator: public StubCodeGenerator {
 109  public:
 110 
 111   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 112 
 113   address clear_apx_test_state() {
 114 #   define __ _masm->
 115     address start = __ pc();
 116     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 117     // handling guarantees that preserved register values post signal handling were
 118     // re-instantiated by operating system and not because they were not modified externally.
 119 
 120     bool save_apx = UseAPX;
 121     VM_Version::set_apx_cpuFeatures();
 122     UseAPX = true;
 123     // EGPR state save/restoration.
 124     __ mov64(r16, 0L);
 125     __ mov64(r31, 0L);
 126     UseAPX = save_apx;
 127     VM_Version::clean_cpuFeatures();
 128     __ ret(0);
 129     return start;
 130   }
 131 
 132   address generate_get_cpu_info() {
 133     // Flags to test CPU type.
 134     const uint32_t HS_EFL_AC = 0x40000;
 135     const uint32_t HS_EFL_ID = 0x200000;
 136     // Values for when we don't have a CPUID instruction.
 137     const int      CPU_FAMILY_SHIFT = 8;
 138     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 139     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 140     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 141 
 142     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 143     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 144     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 145     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 146 
 147     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 148 #   define __ _masm->
 149 
 150     address start = __ pc();
 151 
 152     //
 153     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 154     //
 155     // rcx and rdx are first and second argument registers on windows
 156 
 157     __ push(rbp);
 158     __ mov(rbp, c_rarg0); // cpuid_info address
 159     __ push(rbx);
 160     __ push(rsi);
 161     __ pushf();          // preserve rbx, and flags
 162     __ pop(rax);
 163     __ push(rax);
 164     __ mov(rcx, rax);
 165     //
 166     // if we are unable to change the AC flag, we have a 386
 167     //
 168     __ xorl(rax, HS_EFL_AC);
 169     __ push(rax);
 170     __ popf();
 171     __ pushf();
 172     __ pop(rax);
 173     __ cmpptr(rax, rcx);
 174     __ jccb(Assembler::notEqual, detect_486);
 175 
 176     __ movl(rax, CPU_FAMILY_386);
 177     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 178     __ jmp(done);
 179 
 180     //
 181     // If we are unable to change the ID flag, we have a 486 which does
 182     // not support the "cpuid" instruction.
 183     //
 184     __ bind(detect_486);
 185     __ mov(rax, rcx);
 186     __ xorl(rax, HS_EFL_ID);
 187     __ push(rax);
 188     __ popf();
 189     __ pushf();
 190     __ pop(rax);
 191     __ cmpptr(rcx, rax);
 192     __ jccb(Assembler::notEqual, detect_586);
 193 
 194     __ bind(cpu486);
 195     __ movl(rax, CPU_FAMILY_486);
 196     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 197     __ jmp(done);
 198 
 199     //
 200     // At this point, we have a chip which supports the "cpuid" instruction
 201     //
 202     __ bind(detect_586);
 203     __ xorl(rax, rax);
 204     __ cpuid();
 205     __ orl(rax, rax);
 206     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 207                                         // value of at least 1, we give up and
 208                                         // assume a 486
 209     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 210     __ movl(Address(rsi, 0), rax);
 211     __ movl(Address(rsi, 4), rbx);
 212     __ movl(Address(rsi, 8), rcx);
 213     __ movl(Address(rsi,12), rdx);
 214 
 215     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 216     __ jccb(Assembler::belowEqual, std_cpuid4);
 217 
 218     //
 219     // cpuid(0xB) Processor Topology
 220     //
 221     __ movl(rax, 0xb);
 222     __ xorl(rcx, rcx);   // Threads level
 223     __ cpuid();
 224 
 225     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 226     __ movl(Address(rsi, 0), rax);
 227     __ movl(Address(rsi, 4), rbx);
 228     __ movl(Address(rsi, 8), rcx);
 229     __ movl(Address(rsi,12), rdx);
 230 
 231     __ movl(rax, 0xb);
 232     __ movl(rcx, 1);     // Cores level
 233     __ cpuid();
 234     __ push(rax);
 235     __ andl(rax, 0x1f);  // Determine if valid topology level
 236     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 237     __ andl(rax, 0xffff);
 238     __ pop(rax);
 239     __ jccb(Assembler::equal, std_cpuid4);
 240 
 241     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 242     __ movl(Address(rsi, 0), rax);
 243     __ movl(Address(rsi, 4), rbx);
 244     __ movl(Address(rsi, 8), rcx);
 245     __ movl(Address(rsi,12), rdx);
 246 
 247     __ movl(rax, 0xb);
 248     __ movl(rcx, 2);     // Packages level
 249     __ cpuid();
 250     __ push(rax);
 251     __ andl(rax, 0x1f);  // Determine if valid topology level
 252     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 253     __ andl(rax, 0xffff);
 254     __ pop(rax);
 255     __ jccb(Assembler::equal, std_cpuid4);
 256 
 257     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 258     __ movl(Address(rsi, 0), rax);
 259     __ movl(Address(rsi, 4), rbx);
 260     __ movl(Address(rsi, 8), rcx);
 261     __ movl(Address(rsi,12), rdx);
 262 
 263     //
 264     // cpuid(0x4) Deterministic cache params
 265     //
 266     __ bind(std_cpuid4);
 267     __ movl(rax, 4);
 268     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 269     __ jccb(Assembler::greater, std_cpuid1);
 270 
 271     __ xorl(rcx, rcx);   // L1 cache
 272     __ cpuid();
 273     __ push(rax);
 274     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 275     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 276     __ pop(rax);
 277     __ jccb(Assembler::equal, std_cpuid1);
 278 
 279     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 280     __ movl(Address(rsi, 0), rax);
 281     __ movl(Address(rsi, 4), rbx);
 282     __ movl(Address(rsi, 8), rcx);
 283     __ movl(Address(rsi,12), rdx);
 284 
 285     //
 286     // Standard cpuid(0x1)
 287     //
 288     __ bind(std_cpuid1);
 289     __ movl(rax, 1);
 290     __ cpuid();
 291     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 292     __ movl(Address(rsi, 0), rax);
 293     __ movl(Address(rsi, 4), rbx);
 294     __ movl(Address(rsi, 8), rcx);
 295     __ movl(Address(rsi,12), rdx);
 296 
 297     //
 298     // Check if OS has enabled XGETBV instruction to access XCR0
 299     // (OSXSAVE feature flag) and CPU supports AVX
 300     //
 301     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 302     __ cmpl(rcx, 0x18000000);
 303     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 304 
 305     //
 306     // XCR0, XFEATURE_ENABLED_MASK register
 307     //
 308     __ xorl(rcx, rcx);   // zero for XCR0 register
 309     __ xgetbv();
 310     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 311     __ movl(Address(rsi, 0), rax);
 312     __ movl(Address(rsi, 4), rdx);
 313 
 314     //
 315     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 316     //
 317     __ bind(sef_cpuid);
 318     __ movl(rax, 7);
 319     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 320     __ jccb(Assembler::greater, ext_cpuid);
 321     // ECX = 0
 322     __ xorl(rcx, rcx);
 323     __ cpuid();
 324     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 325     __ movl(Address(rsi, 0), rax);
 326     __ movl(Address(rsi, 4), rbx);
 327     __ movl(Address(rsi, 8), rcx);
 328     __ movl(Address(rsi, 12), rdx);
 329 
 330     //
 331     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 332     //
 333     __ bind(sefsl1_cpuid);
 334     __ movl(rax, 7);
 335     __ movl(rcx, 1);
 336     __ cpuid();
 337     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 338     __ movl(Address(rsi, 0), rax);
 339     __ movl(Address(rsi, 4), rdx);
 340 
 341     //
 342     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 343     //
 344     __ bind(std_cpuid29);
 345     __ movl(rax, 0x29);
 346     __ movl(rcx, 0);
 347     __ cpuid();
 348     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 349     __ movl(Address(rsi, 0), rbx);
 350 
 351     //
 352     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 353     //
 354     __ bind(std_cpuid24);
 355     __ movl(rax, 0x24);
 356     __ movl(rcx, 0);
 357     __ cpuid();
 358     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 359     __ movl(Address(rsi, 0), rax);
 360     __ movl(Address(rsi, 4), rbx);
 361 
 362     //
 363     // Extended cpuid(0x80000000)
 364     //
 365     __ bind(ext_cpuid);
 366     __ movl(rax, 0x80000000);
 367     __ cpuid();
 368     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 369     __ jcc(Assembler::belowEqual, done);
 370     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 371     __ jcc(Assembler::belowEqual, ext_cpuid1);
 372     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 373     __ jccb(Assembler::belowEqual, ext_cpuid5);
 374     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 375     __ jccb(Assembler::belowEqual, ext_cpuid7);
 376     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 377     __ jccb(Assembler::belowEqual, ext_cpuid8);
 378     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 379     __ jccb(Assembler::below, ext_cpuid8);
 380     //
 381     // Extended cpuid(0x8000001E)
 382     //
 383     __ movl(rax, 0x8000001E);
 384     __ cpuid();
 385     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 386     __ movl(Address(rsi, 0), rax);
 387     __ movl(Address(rsi, 4), rbx);
 388     __ movl(Address(rsi, 8), rcx);
 389     __ movl(Address(rsi,12), rdx);
 390 
 391     //
 392     // Extended cpuid(0x80000008)
 393     //
 394     __ bind(ext_cpuid8);
 395     __ movl(rax, 0x80000008);
 396     __ cpuid();
 397     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 398     __ movl(Address(rsi, 0), rax);
 399     __ movl(Address(rsi, 4), rbx);
 400     __ movl(Address(rsi, 8), rcx);
 401     __ movl(Address(rsi,12), rdx);
 402 
 403     //
 404     // Extended cpuid(0x80000007)
 405     //
 406     __ bind(ext_cpuid7);
 407     __ movl(rax, 0x80000007);
 408     __ cpuid();
 409     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 410     __ movl(Address(rsi, 0), rax);
 411     __ movl(Address(rsi, 4), rbx);
 412     __ movl(Address(rsi, 8), rcx);
 413     __ movl(Address(rsi,12), rdx);
 414 
 415     //
 416     // Extended cpuid(0x80000005)
 417     //
 418     __ bind(ext_cpuid5);
 419     __ movl(rax, 0x80000005);
 420     __ cpuid();
 421     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 422     __ movl(Address(rsi, 0), rax);
 423     __ movl(Address(rsi, 4), rbx);
 424     __ movl(Address(rsi, 8), rcx);
 425     __ movl(Address(rsi,12), rdx);
 426 
 427     //
 428     // Extended cpuid(0x80000001)
 429     //
 430     __ bind(ext_cpuid1);
 431     __ movl(rax, 0x80000001);
 432     __ cpuid();
 433     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 434     __ movl(Address(rsi, 0), rax);
 435     __ movl(Address(rsi, 4), rbx);
 436     __ movl(Address(rsi, 8), rcx);
 437     __ movl(Address(rsi,12), rdx);
 438 
 439     //
 440     // Check if OS has enabled XGETBV instruction to access XCR0
 441     // (OSXSAVE feature flag) and CPU supports APX
 442     //
 443     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 444     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 445     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 446     __ movl(rax, 0x200000);
 447     __ andl(rax, Address(rsi, 4));
 448     __ jcc(Assembler::equal, vector_save_restore);
 449     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 450     __ movl(rax, 0x80000);
 451     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 452     __ jcc(Assembler::equal, vector_save_restore);
 453 
 454     bool save_apx = UseAPX;
 455     VM_Version::set_apx_cpuFeatures();
 456     UseAPX = true;
 457     __ mov64(r16, VM_Version::egpr_test_value());
 458     __ mov64(r31, VM_Version::egpr_test_value());
 459     __ xorl(rsi, rsi);
 460     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 461     // Generate SEGV
 462     __ movl(rax, Address(rsi, 0));
 463 
 464     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 465     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 466     __ movq(Address(rsi, 0), r16);
 467     __ movq(Address(rsi, 8), r31);
 468 
 469     UseAPX = save_apx;
 470     __ bind(vector_save_restore);
 471     //
 472     // Check if OS has enabled XGETBV instruction to access XCR0
 473     // (OSXSAVE feature flag) and CPU supports AVX
 474     //
 475     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 476     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 477     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 478     __ cmpl(rcx, 0x18000000);
 479     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 480 
 481     __ movl(rax, 0x6);
 482     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 483     __ cmpl(rax, 0x6);
 484     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 485 
 486     // we need to bridge farther than imm8, so we use this island as a thunk
 487     __ bind(done);
 488     __ jmp(wrapup);
 489 
 490     __ bind(start_simd_check);
 491     //
 492     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 493     // registers are not restored after a signal processing.
 494     // Generate SEGV here (reference through null)
 495     // and check upper YMM/ZMM bits after it.
 496     //
 497     int saved_useavx = UseAVX;
 498     int saved_usesse = UseSSE;
 499 
 500     // If UseAVX is uninitialized or is set by the user to include EVEX
 501     if (use_evex) {
 502       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 503       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 504       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 505       __ movl(rax, 0x10000);
 506       __ andl(rax, Address(rsi, 4));
 507       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 508       __ movl(rbx, 0x80000);
 509       __ andl(rbx, Address(rsi, 4));
 510       __ orl(rax, rbx);
 511       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 512       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 513       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 514       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 515       __ movl(rax, 0xE0);
 516       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 517       __ cmpl(rax, 0xE0);
 518       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 519 
 520       if (FLAG_IS_DEFAULT(UseAVX)) {
 521         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 522         __ movl(rax, Address(rsi, 0));
 523         __ cmpl(rax, 0x50654);              // If it is Skylake
 524         __ jcc(Assembler::equal, legacy_setup);
 525       }
 526       // EVEX setup: run in lowest evex mode
 527       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 528       UseAVX = 3;
 529       UseSSE = 2;
 530 #ifdef _WINDOWS
 531       // xmm5-xmm15 are not preserved by caller on windows
 532       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 533       __ subptr(rsp, 64);
 534       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 535       __ subptr(rsp, 64);
 536       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 537       __ subptr(rsp, 64);
 538       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 539 #endif // _WINDOWS
 540 
 541       // load value into all 64 bytes of zmm7 register
 542       __ movl(rcx, VM_Version::ymm_test_value());
 543       __ movdl(xmm0, rcx);
 544       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 545       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 546       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 547       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 548       VM_Version::clean_cpuFeatures();
 549       __ jmp(save_restore_except);
 550     }
 551 
 552     __ bind(legacy_setup);
 553     // AVX setup
 554     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 555     UseAVX = 1;
 556     UseSSE = 2;
 557 #ifdef _WINDOWS
 558     __ subptr(rsp, 32);
 559     __ vmovdqu(Address(rsp, 0), xmm7);
 560     __ subptr(rsp, 32);
 561     __ vmovdqu(Address(rsp, 0), xmm8);
 562     __ subptr(rsp, 32);
 563     __ vmovdqu(Address(rsp, 0), xmm15);
 564 #endif // _WINDOWS
 565 
 566     // load value into all 32 bytes of ymm7 register
 567     __ movl(rcx, VM_Version::ymm_test_value());
 568 
 569     __ movdl(xmm0, rcx);
 570     __ pshufd(xmm0, xmm0, 0x00);
 571     __ vinsertf128_high(xmm0, xmm0);
 572     __ vmovdqu(xmm7, xmm0);
 573     __ vmovdqu(xmm8, xmm0);
 574     __ vmovdqu(xmm15, xmm0);
 575     VM_Version::clean_cpuFeatures();
 576 
 577     __ bind(save_restore_except);
 578     __ xorl(rsi, rsi);
 579     VM_Version::set_cpuinfo_segv_addr(__ pc());
 580     // Generate SEGV
 581     __ movl(rax, Address(rsi, 0));
 582 
 583     VM_Version::set_cpuinfo_cont_addr(__ pc());
 584     // Returns here after signal. Save xmm0 to check it later.
 585 
 586     // If UseAVX is uninitialized or is set by the user to include EVEX
 587     if (use_evex) {
 588       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 589       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 590       __ movl(rax, 0x10000);
 591       __ andl(rax, Address(rsi, 4));
 592       __ jcc(Assembler::equal, legacy_save_restore);
 593       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 594       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 595       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 596       __ movl(rax, 0xE0);
 597       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 598       __ cmpl(rax, 0xE0);
 599       __ jcc(Assembler::notEqual, legacy_save_restore);
 600 
 601       if (FLAG_IS_DEFAULT(UseAVX)) {
 602         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 603         __ movl(rax, Address(rsi, 0));
 604         __ cmpl(rax, 0x50654);              // If it is Skylake
 605         __ jcc(Assembler::equal, legacy_save_restore);
 606       }
 607       // EVEX check: run in lowest evex mode
 608       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 609       UseAVX = 3;
 610       UseSSE = 2;
 611       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 612       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 613       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 614       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 615       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 616 
 617 #ifdef _WINDOWS
 618       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 619       __ addptr(rsp, 64);
 620       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 621       __ addptr(rsp, 64);
 622       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 623       __ addptr(rsp, 64);
 624 #endif // _WINDOWS
 625       generate_vzeroupper(wrapup);
 626       VM_Version::clean_cpuFeatures();
 627       UseAVX = saved_useavx;
 628       UseSSE = saved_usesse;
 629       __ jmp(wrapup);
 630    }
 631 
 632     __ bind(legacy_save_restore);
 633     // AVX check
 634     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 635     UseAVX = 1;
 636     UseSSE = 2;
 637     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 638     __ vmovdqu(Address(rsi, 0), xmm0);
 639     __ vmovdqu(Address(rsi, 32), xmm7);
 640     __ vmovdqu(Address(rsi, 64), xmm8);
 641     __ vmovdqu(Address(rsi, 96), xmm15);
 642 
 643 #ifdef _WINDOWS
 644     __ vmovdqu(xmm15, Address(rsp, 0));
 645     __ addptr(rsp, 32);
 646     __ vmovdqu(xmm8, Address(rsp, 0));
 647     __ addptr(rsp, 32);
 648     __ vmovdqu(xmm7, Address(rsp, 0));
 649     __ addptr(rsp, 32);
 650 #endif // _WINDOWS
 651 
 652     generate_vzeroupper(wrapup);
 653     VM_Version::clean_cpuFeatures();
 654     UseAVX = saved_useavx;
 655     UseSSE = saved_usesse;
 656 
 657     __ bind(wrapup);
 658     __ popf();
 659     __ pop(rsi);
 660     __ pop(rbx);
 661     __ pop(rbp);
 662     __ ret(0);
 663 
 664 #   undef __
 665 
 666     return start;
 667   };
 668   void generate_vzeroupper(Label& L_wrapup) {
 669 #   define __ _masm->
 670     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 671     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 672     __ jcc(Assembler::notEqual, L_wrapup);
 673     __ movl(rcx, 0x0FFF0FF0);
 674     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 675     __ andl(rcx, Address(rsi, 0));
 676     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 677     __ jcc(Assembler::equal, L_wrapup);
 678     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 679     __ jcc(Assembler::equal, L_wrapup);
 680     // vzeroupper() will use a pre-computed instruction sequence that we
 681     // can't compute until after we've determined CPU capabilities. Use
 682     // uncached variant here directly to be able to bootstrap correctly
 683     __ vzeroupper_uncached();
 684 #   undef __
 685   }
 686   address generate_detect_virt() {
 687     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 688 #   define __ _masm->
 689 
 690     address start = __ pc();
 691 
 692     // Evacuate callee-saved registers
 693     __ push(rbp);
 694     __ push(rbx);
 695     __ push(rsi); // for Windows
 696 
 697     __ mov(rax, c_rarg0); // CPUID leaf
 698     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 699 
 700     __ cpuid();
 701 
 702     // Store result to register array
 703     __ movl(Address(rsi,  0), rax);
 704     __ movl(Address(rsi,  4), rbx);
 705     __ movl(Address(rsi,  8), rcx);
 706     __ movl(Address(rsi, 12), rdx);
 707 
 708     // Epilogue
 709     __ pop(rsi);
 710     __ pop(rbx);
 711     __ pop(rbp);
 712     __ ret(0);
 713 
 714 #   undef __
 715 
 716     return start;
 717   };
 718 
 719 
 720   address generate_getCPUIDBrandString(void) {
 721     // Flags to test CPU type.
 722     const uint32_t HS_EFL_AC           = 0x40000;
 723     const uint32_t HS_EFL_ID           = 0x200000;
 724     // Values for when we don't have a CPUID instruction.
 725     const int      CPU_FAMILY_SHIFT = 8;
 726     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 727     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 728 
 729     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 730 
 731     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 732 #   define __ _masm->
 733 
 734     address start = __ pc();
 735 
 736     //
 737     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 738     //
 739     // rcx and rdx are first and second argument registers on windows
 740 
 741     __ push(rbp);
 742     __ mov(rbp, c_rarg0); // cpuid_info address
 743     __ push(rbx);
 744     __ push(rsi);
 745     __ pushf();          // preserve rbx, and flags
 746     __ pop(rax);
 747     __ push(rax);
 748     __ mov(rcx, rax);
 749     //
 750     // if we are unable to change the AC flag, we have a 386
 751     //
 752     __ xorl(rax, HS_EFL_AC);
 753     __ push(rax);
 754     __ popf();
 755     __ pushf();
 756     __ pop(rax);
 757     __ cmpptr(rax, rcx);
 758     __ jccb(Assembler::notEqual, detect_486);
 759 
 760     __ movl(rax, CPU_FAMILY_386);
 761     __ jmp(done);
 762 
 763     //
 764     // If we are unable to change the ID flag, we have a 486 which does
 765     // not support the "cpuid" instruction.
 766     //
 767     __ bind(detect_486);
 768     __ mov(rax, rcx);
 769     __ xorl(rax, HS_EFL_ID);
 770     __ push(rax);
 771     __ popf();
 772     __ pushf();
 773     __ pop(rax);
 774     __ cmpptr(rcx, rax);
 775     __ jccb(Assembler::notEqual, detect_586);
 776 
 777     __ bind(cpu486);
 778     __ movl(rax, CPU_FAMILY_486);
 779     __ jmp(done);
 780 
 781     //
 782     // At this point, we have a chip which supports the "cpuid" instruction
 783     //
 784     __ bind(detect_586);
 785     __ xorl(rax, rax);
 786     __ cpuid();
 787     __ orl(rax, rax);
 788     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 789                                         // value of at least 1, we give up and
 790                                         // assume a 486
 791 
 792     //
 793     // Extended cpuid(0x80000000) for processor brand string detection
 794     //
 795     __ bind(ext_cpuid);
 796     __ movl(rax, CPUID_EXTENDED_FN);
 797     __ cpuid();
 798     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 799     __ jcc(Assembler::below, done);
 800 
 801     //
 802     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 803     //
 804     __ movl(rax, CPUID_EXTENDED_FN_2);
 805     __ cpuid();
 806     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 807     __ movl(Address(rsi, 0), rax);
 808     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 809     __ movl(Address(rsi, 0), rbx);
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 811     __ movl(Address(rsi, 0), rcx);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 813     __ movl(Address(rsi,0), rdx);
 814 
 815     //
 816     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 817     //
 818     __ movl(rax, CPUID_EXTENDED_FN_3);
 819     __ cpuid();
 820     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 821     __ movl(Address(rsi, 0), rax);
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 823     __ movl(Address(rsi, 0), rbx);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 825     __ movl(Address(rsi, 0), rcx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 827     __ movl(Address(rsi,0), rdx);
 828 
 829     //
 830     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 831     //
 832     __ movl(rax, CPUID_EXTENDED_FN_4);
 833     __ cpuid();
 834     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 835     __ movl(Address(rsi, 0), rax);
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 837     __ movl(Address(rsi, 0), rbx);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 839     __ movl(Address(rsi, 0), rcx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 841     __ movl(Address(rsi,0), rdx);
 842 
 843     //
 844     // return
 845     //
 846     __ bind(done);
 847     __ popf();
 848     __ pop(rsi);
 849     __ pop(rbx);
 850     __ pop(rbp);
 851     __ ret(0);
 852 
 853 #   undef __
 854 
 855     return start;
 856   };
 857 };
 858 
 859 void VM_Version::get_processor_features() {
 860 
 861   _cpu = 4; // 486 by default
 862   _model = 0;
 863   _stepping = 0;
 864   _logical_processors_per_package = 1;
 865   // i486 internal cache is both I&D and has a 16-byte line size
 866   _L1_data_cache_line_size = 16;
 867 
 868   // Get raw processor info
 869 
 870   get_cpu_info_stub(&_cpuid_info);
 871 
 872   assert_is_initialized();
 873   _cpu = extended_cpu_family();
 874   _model = extended_cpu_model();
 875   _stepping = cpu_stepping();
 876 
 877   if (cpu_family() > 4) { // it supports CPUID
 878     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 879     _cpu_features = _features; // Preserve features
 880     // Logical processors are only available on P4s and above,
 881     // and only if hyperthreading is available.
 882     _logical_processors_per_package = logical_processor_count();
 883     _L1_data_cache_line_size = L1_line_size();
 884   }
 885 
 886   // xchg and xadd instructions
 887   _supports_atomic_getset4 = true;
 888   _supports_atomic_getadd4 = true;
 889   _supports_atomic_getset8 = true;
 890   _supports_atomic_getadd8 = true;
 891 
 892   // OS should support SSE for x64 and hardware should support at least SSE2.
 893   if (!VM_Version::supports_sse2()) {
 894     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 895   }
 896   // in 64 bit the use of SSE2 is the minimum
 897   if (UseSSE < 2) UseSSE = 2;
 898 
 899   // flush_icache_stub have to be generated first.
 900   // That is why Icache line size is hard coded in ICache class,
 901   // see icache_x86.hpp. It is also the reason why we can't use
 902   // clflush instruction in 32-bit VM since it could be running
 903   // on CPU which does not support it.
 904   //
 905   // The only thing we can do is to verify that flushed
 906   // ICache::line_size has correct value.
 907   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 908   // clflush_size is size in quadwords (8 bytes).
 909   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 910 
 911   // assigning this field effectively enables Unsafe.writebackMemory()
 912   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 913   // that is only implemented on x86_64 and only if the OS plays ball
 914   if (os::supports_map_sync()) {
 915     // publish data cache line flush size to generic field, otherwise
 916     // let if default to zero thereby disabling writeback
 917     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 918   }
 919 
 920   // Check if processor has Intel Ecore
 921   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 922     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 923       _model == 0xCC || _model == 0xDD)) {
 924     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 925   }
 926 
 927   if (UseSSE < 4) {
 928     _features.clear_feature(CPU_SSE4_1);
 929     _features.clear_feature(CPU_SSE4_2);
 930   }
 931 
 932   if (UseSSE < 3) {
 933     _features.clear_feature(CPU_SSE3);
 934     _features.clear_feature(CPU_SSSE3);
 935     _features.clear_feature(CPU_SSE4A);
 936   }
 937 
 938   if (UseSSE < 2)
 939     _features.clear_feature(CPU_SSE2);
 940 
 941   if (UseSSE < 1)
 942     _features.clear_feature(CPU_SSE);
 943 
 944   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 945   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 946     UseAVX = 0;
 947   }
 948 
 949   // UseSSE is set to the smaller of what hardware supports and what
 950   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 951   // older Pentiums which do not support it.
 952   int use_sse_limit = 0;
 953   if (UseSSE > 0) {
 954     if (UseSSE > 3 && supports_sse4_1()) {
 955       use_sse_limit = 4;
 956     } else if (UseSSE > 2 && supports_sse3()) {
 957       use_sse_limit = 3;
 958     } else if (UseSSE > 1 && supports_sse2()) {
 959       use_sse_limit = 2;
 960     } else if (UseSSE > 0 && supports_sse()) {
 961       use_sse_limit = 1;
 962     } else {
 963       use_sse_limit = 0;
 964     }
 965   }
 966   if (FLAG_IS_DEFAULT(UseSSE)) {
 967     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 968   } else if (UseSSE > use_sse_limit) {
 969     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 970     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 971   }
 972 
 973   // first try initial setting and detect what we can support
 974   int use_avx_limit = 0;
 975   if (UseAVX > 0) {
 976     if (UseSSE < 4) {
 977       // Don't use AVX if SSE is unavailable or has been disabled.
 978       use_avx_limit = 0;
 979     } else if (UseAVX > 2 && supports_evex()) {
 980       use_avx_limit = 3;
 981     } else if (UseAVX > 1 && supports_avx2()) {
 982       use_avx_limit = 2;
 983     } else if (UseAVX > 0 && supports_avx()) {
 984       use_avx_limit = 1;
 985     } else {
 986       use_avx_limit = 0;
 987     }
 988   }
 989   if (FLAG_IS_DEFAULT(UseAVX)) {
 990     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 991     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 992       FLAG_SET_DEFAULT(UseAVX, 2);
 993     } else {
 994       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 995     }
 996   }
 997 
 998   if (UseAVX > use_avx_limit) {
 999     if (UseSSE < 4) {
1000       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1001     } else {
1002       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1003     }
1004     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1005   }
1006 
1007   if (UseAVX < 3) {
1008     _features.clear_feature(CPU_AVX512F);
1009     _features.clear_feature(CPU_AVX512DQ);
1010     _features.clear_feature(CPU_AVX512CD);
1011     _features.clear_feature(CPU_AVX512BW);
1012     _features.clear_feature(CPU_AVX512ER);
1013     _features.clear_feature(CPU_AVX512PF);
1014     _features.clear_feature(CPU_AVX512VL);
1015     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1016     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1017     _features.clear_feature(CPU_AVX512_VAES);
1018     _features.clear_feature(CPU_AVX512_VNNI);
1019     _features.clear_feature(CPU_AVX512_VBMI);
1020     _features.clear_feature(CPU_AVX512_VBMI2);
1021     _features.clear_feature(CPU_AVX512_BITALG);
1022     _features.clear_feature(CPU_AVX512_IFMA);
1023     _features.clear_feature(CPU_APX_F);
1024     _features.clear_feature(CPU_AVX512_FP16);
1025     _features.clear_feature(CPU_AVX10_1);
1026     _features.clear_feature(CPU_AVX10_2);
1027   }
1028 
1029 
1030   if (UseAVX < 2) {
1031     _features.clear_feature(CPU_AVX2);
1032     _features.clear_feature(CPU_AVX_IFMA);
1033   }
1034 
1035   if (UseAVX < 1) {
1036     _features.clear_feature(CPU_AVX);
1037     _features.clear_feature(CPU_VZEROUPPER);
1038     _features.clear_feature(CPU_F16C);
1039     _features.clear_feature(CPU_SHA512);
1040   }
1041 
1042   if (logical_processors_per_package() == 1) {
1043     // HT processor could be installed on a system which doesn't support HT.
1044     _features.clear_feature(CPU_HT);
1045   }
1046 
1047   if (is_intel()) { // Intel cpus specific settings
1048     if (is_knights_family()) {
1049       _features.clear_feature(CPU_VZEROUPPER);
1050       _features.clear_feature(CPU_AVX512BW);
1051       _features.clear_feature(CPU_AVX512VL);
1052       _features.clear_feature(CPU_APX_F);
1053       _features.clear_feature(CPU_AVX512DQ);
1054       _features.clear_feature(CPU_AVX512_VNNI);
1055       _features.clear_feature(CPU_AVX512_VAES);
1056       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1057       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1058       _features.clear_feature(CPU_AVX512_VBMI);
1059       _features.clear_feature(CPU_AVX512_VBMI2);
1060       _features.clear_feature(CPU_CLWB);
1061       _features.clear_feature(CPU_FLUSHOPT);
1062       _features.clear_feature(CPU_GFNI);
1063       _features.clear_feature(CPU_AVX512_BITALG);
1064       _features.clear_feature(CPU_AVX512_IFMA);
1065       _features.clear_feature(CPU_AVX_IFMA);
1066       _features.clear_feature(CPU_AVX512_FP16);
1067       _features.clear_feature(CPU_AVX10_1);
1068       _features.clear_feature(CPU_AVX10_2);
1069     }
1070   }
1071 
1072     // Currently APX support is only enabled for targets supporting AVX512VL feature.
1073   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1074   if (UseAPX && !apx_supported) {
1075     warning("UseAPX is not supported on this CPU, setting it to false");
1076     FLAG_SET_DEFAULT(UseAPX, false);
1077   }
1078 
1079   if (!UseAPX) {
1080     _features.clear_feature(CPU_APX_F);
1081   }
1082 
1083   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1084     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1085     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1086   } else {
1087     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1088   }
1089 
1090   assert(supports_clflush(), "Always present");
1091   if (X86ICacheSync == -1) {
1092     // Auto-detect, choosing the best performant one that still flushes
1093     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1094     if (supports_clwb()) {
1095       FLAG_SET_ERGO(X86ICacheSync, 3);
1096     } else if (supports_clflushopt()) {
1097       FLAG_SET_ERGO(X86ICacheSync, 2);
1098     } else {
1099       FLAG_SET_ERGO(X86ICacheSync, 1);
1100     }
1101   } else {
1102     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1103       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1104     }
1105     if ((X86ICacheSync == 3) && !supports_clwb()) {
1106       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1107     }
1108     if ((X86ICacheSync == 5) && !supports_serialize()) {
1109       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1110     }
1111   }
1112 
1113   stringStream ss(2048);
1114   if (supports_hybrid()) {
1115     ss.print("(hybrid)");
1116   } else {
1117     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1118   }
1119   ss.print(" family %d model %d stepping %d microcode 0x%x",
1120            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1121   ss.print(", ");
1122   int features_offset = (int)ss.size();
1123   insert_features_names(_features, ss);
1124 
1125   _cpu_info_string = ss.as_string(true);
1126   _features_string = _cpu_info_string + features_offset;
1127 
1128   // Use AES instructions if available.
1129   if (supports_aes()) {
1130     if (FLAG_IS_DEFAULT(UseAES)) {
1131       FLAG_SET_DEFAULT(UseAES, true);
1132     }
1133     if (!UseAES) {
1134       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1135         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1136       }
1137       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1138     } else {
1139       if (UseSSE > 2) {
1140         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1141           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1142         }
1143       } else {
1144         // The AES intrinsic stubs require AES instruction support (of course)
1145         // but also require sse3 mode or higher for instructions it use.
1146         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1147           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1148         }
1149         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1150       }
1151 
1152       // --AES-CTR begins--
1153       if (!UseAESIntrinsics) {
1154         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1155           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1156           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1157         }
1158       } else {
1159         if (supports_sse4_1()) {
1160           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1161             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1162           }
1163         } else {
1164            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1165            // but also require sse4.1 mode or higher for instructions it use.
1166           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1167              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1168            }
1169            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1170         }
1171       }
1172       // --AES-CTR ends--
1173     }
1174   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1175     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1176       warning("AES instructions are not available on this CPU");
1177       FLAG_SET_DEFAULT(UseAES, false);
1178     }
1179     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1180       warning("AES intrinsics are not available on this CPU");
1181       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1182     }
1183     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1184       warning("AES-CTR intrinsics are not available on this CPU");
1185       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1186     }
1187   }
1188 
1189   // Use CLMUL instructions if available.
1190   if (supports_clmul()) {
1191     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1192       UseCLMUL = true;
1193     }
1194   } else if (UseCLMUL) {
1195     if (!FLAG_IS_DEFAULT(UseCLMUL))
1196       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1197     FLAG_SET_DEFAULT(UseCLMUL, false);
1198   }
1199 
1200   if (UseCLMUL && (UseSSE > 2)) {
1201     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1202       UseCRC32Intrinsics = true;
1203     }
1204   } else if (UseCRC32Intrinsics) {
1205     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1206       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1207     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1208   }
1209 
1210   if (supports_avx2()) {
1211     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1212       UseAdler32Intrinsics = true;
1213     }
1214   } else if (UseAdler32Intrinsics) {
1215     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1216       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1217     }
1218     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1219   }
1220 
1221   if (supports_sse4_2() && supports_clmul()) {
1222     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1223       UseCRC32CIntrinsics = true;
1224     }
1225   } else if (UseCRC32CIntrinsics) {
1226     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1227       warning("CRC32C intrinsics are not available on this CPU");
1228     }
1229     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1230   }
1231 
1232   // GHASH/GCM intrinsics
1233   if (UseCLMUL && (UseSSE > 2)) {
1234     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1235       UseGHASHIntrinsics = true;
1236     }
1237   } else if (UseGHASHIntrinsics) {
1238     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1239       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1240     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1241   }
1242 
1243   // ChaCha20 Intrinsics
1244   // As long as the system supports AVX as a baseline we can do a
1245   // SIMD-enabled block function.  StubGenerator makes the determination
1246   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1247   // version.
1248   if (UseAVX >= 1) {
1249       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1250           UseChaCha20Intrinsics = true;
1251       }
1252   } else if (UseChaCha20Intrinsics) {
1253       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1254           warning("ChaCha20 intrinsic requires AVX instructions");
1255       }
1256       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1257   }
1258 
1259   // Kyber Intrinsics
1260   // Currently we only have them for AVX512
1261 #ifdef _LP64
1262   if (supports_evex() && supports_avx512bw()) {
1263       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1264           UseKyberIntrinsics = true;
1265       }
1266   } else
1267 #endif
1268   if (UseKyberIntrinsics) {
1269      warning("Intrinsics for ML-KEM are not available on this CPU.");
1270      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1271   }
1272 
1273   // Dilithium Intrinsics
1274   // Currently we only have them for AVX512
1275   if (supports_evex() && supports_avx512bw()) {
1276       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1277           UseDilithiumIntrinsics = true;
1278       }
1279   } else if (UseDilithiumIntrinsics) {
1280       warning("Intrinsics for ML-DSA are not available on this CPU.");
1281       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1282   }
1283 
1284   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1285   if (UseAVX >= 2) {
1286     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1287       UseBASE64Intrinsics = true;
1288     }
1289   } else if (UseBASE64Intrinsics) {
1290      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1291       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1292     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1293   }
1294 
1295   if (supports_fma()) {
1296     if (FLAG_IS_DEFAULT(UseFMA)) {
1297       UseFMA = true;
1298     }
1299   } else if (UseFMA) {
1300     warning("FMA instructions are not available on this CPU");
1301     FLAG_SET_DEFAULT(UseFMA, false);
1302   }
1303 
1304   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1305     UseMD5Intrinsics = true;
1306   }
1307 
1308   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1309     if (FLAG_IS_DEFAULT(UseSHA)) {
1310       UseSHA = true;
1311     }
1312   } else if (UseSHA) {
1313     warning("SHA instructions are not available on this CPU");
1314     FLAG_SET_DEFAULT(UseSHA, false);
1315   }
1316 
1317   if (supports_sha() && supports_sse4_1() && UseSHA) {
1318     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1319       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1320     }
1321   } else if (UseSHA1Intrinsics) {
1322     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1323     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1324   }
1325 
1326   if (supports_sse4_1() && UseSHA) {
1327     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1328       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1329     }
1330   } else if (UseSHA256Intrinsics) {
1331     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1332     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1333   }
1334 
1335   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1336     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1337       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1338     }
1339   } else if (UseSHA512Intrinsics) {
1340     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1341     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1342   }
1343 
1344   if (supports_evex() && supports_avx512bw()) {
1345       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1346           UseSHA3Intrinsics = true;
1347       }
1348   } else if (UseSHA3Intrinsics) {
1349       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1350       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1351   }
1352 
1353   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1354     FLAG_SET_DEFAULT(UseSHA, false);
1355   }
1356 
1357 #if COMPILER2_OR_JVMCI
1358   int max_vector_size = 0;
1359   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1360     // 16 byte vectors (in XMM) are supported with SSE2+
1361     max_vector_size = 16;
1362   } else if (UseAVX == 1 || UseAVX == 2) {
1363     // 32 bytes vectors (in YMM) are only supported with AVX+
1364     max_vector_size = 32;
1365   } else if (UseAVX > 2) {
1366     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1367     max_vector_size = 64;
1368   }
1369 
1370   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1371 
1372   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1373     if (MaxVectorSize < min_vector_size) {
1374       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1375       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1376     }
1377     if (MaxVectorSize > max_vector_size) {
1378       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1379       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1380     }
1381     if (!is_power_of_2(MaxVectorSize)) {
1382       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1383       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1384     }
1385   } else {
1386     // If default, use highest supported configuration
1387     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1388   }
1389 
1390 #if defined(COMPILER2) && defined(ASSERT)
1391   if (MaxVectorSize > 0) {
1392     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1393       tty->print_cr("State of YMM registers after signal handle:");
1394       int nreg = 4;
1395       const char* ymm_name[4] = {"0", "7", "8", "15"};
1396       for (int i = 0; i < nreg; i++) {
1397         tty->print("YMM%s:", ymm_name[i]);
1398         for (int j = 7; j >=0; j--) {
1399           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1400         }
1401         tty->cr();
1402       }
1403     }
1404   }
1405 #endif // COMPILER2 && ASSERT
1406 
1407   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1408     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1409       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1410     }
1411   } else if (UsePoly1305Intrinsics) {
1412     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1413     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1414   }
1415 
1416   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1417     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1418       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1419     }
1420   } else if (UseIntPolyIntrinsics) {
1421     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1422     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1423   }
1424 
1425   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1426     UseMultiplyToLenIntrinsic = true;
1427   }
1428   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1429     UseSquareToLenIntrinsic = true;
1430   }
1431   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1432     UseMulAddIntrinsic = true;
1433   }
1434   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1435     UseMontgomeryMultiplyIntrinsic = true;
1436   }
1437   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1438     UseMontgomerySquareIntrinsic = true;
1439   }
1440 #endif // COMPILER2_OR_JVMCI
1441 
1442   // On new cpus instructions which update whole XMM register should be used
1443   // to prevent partial register stall due to dependencies on high half.
1444   //
1445   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1446   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1447   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1448   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1449 
1450 
1451   if (is_zx()) { // ZX cpus specific settings
1452     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1453       UseStoreImmI16 = false; // don't use it on ZX cpus
1454     }
1455     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1456       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1457         // Use it on all ZX cpus
1458         UseAddressNop = true;
1459       }
1460     }
1461     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1462       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1463     }
1464     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1465       if (supports_sse3()) {
1466         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1467       } else {
1468         UseXmmRegToRegMoveAll = false;
1469       }
1470     }
1471     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1472 #ifdef COMPILER2
1473       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1474         // For new ZX cpus do the next optimization:
1475         // don't align the beginning of a loop if there are enough instructions
1476         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1477         // in current fetch line (OptoLoopAlignment) or the padding
1478         // is big (> MaxLoopPad).
1479         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1480         // generated NOP instructions. 11 is the largest size of one
1481         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1482         MaxLoopPad = 11;
1483       }
1484 #endif // COMPILER2
1485       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1486         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1487       }
1488       if (supports_sse4_2()) { // new ZX cpus
1489         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1490           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1491         }
1492       }
1493     }
1494 
1495     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1496       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1497     }
1498   }
1499 
1500   if (is_amd_family()) { // AMD cpus specific settings
1501     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1502       // Use it on new AMD cpus starting from Opteron.
1503       UseAddressNop = true;
1504     }
1505     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1506       // Use it on new AMD cpus starting from Opteron.
1507       UseNewLongLShift = true;
1508     }
1509     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1510       if (supports_sse4a()) {
1511         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1512       } else {
1513         UseXmmLoadAndClearUpper = false;
1514       }
1515     }
1516     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1517       if (supports_sse4a()) {
1518         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1519       } else {
1520         UseXmmRegToRegMoveAll = false;
1521       }
1522     }
1523     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1524       if (supports_sse4a()) {
1525         UseXmmI2F = true;
1526       } else {
1527         UseXmmI2F = false;
1528       }
1529     }
1530     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1531       if (supports_sse4a()) {
1532         UseXmmI2D = true;
1533       } else {
1534         UseXmmI2D = false;
1535       }
1536     }
1537 
1538     // some defaults for AMD family 15h
1539     if (cpu_family() == 0x15) {
1540       // On family 15h processors default is no sw prefetch
1541       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1542         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1543       }
1544       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1545       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1546         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1547       }
1548       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1549       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1550         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1551       }
1552       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1553         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1554       }
1555     }
1556 
1557 #ifdef COMPILER2
1558     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1559       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1560       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1561     }
1562 #endif // COMPILER2
1563 
1564     // Some defaults for AMD family >= 17h && Hygon family 18h
1565     if (cpu_family() >= 0x17) {
1566       // On family >=17h processors use XMM and UnalignedLoadStores
1567       // for Array Copy
1568       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1569         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1570       }
1571       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1572         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1573       }
1574 #ifdef COMPILER2
1575       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1576         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1577       }
1578 #endif
1579     }
1580   }
1581 
1582   if (is_intel()) { // Intel cpus specific settings
1583     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1584       UseStoreImmI16 = false; // don't use it on Intel cpus
1585     }
1586     if (is_intel_server_family() || cpu_family() == 15) {
1587       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1588         // Use it on all Intel cpus starting from PentiumPro
1589         UseAddressNop = true;
1590       }
1591     }
1592     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1593       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1594     }
1595     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1596       if (supports_sse3()) {
1597         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1598       } else {
1599         UseXmmRegToRegMoveAll = false;
1600       }
1601     }
1602     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1603 #ifdef COMPILER2
1604       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1605         // For new Intel cpus do the next optimization:
1606         // don't align the beginning of a loop if there are enough instructions
1607         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1608         // in current fetch line (OptoLoopAlignment) or the padding
1609         // is big (> MaxLoopPad).
1610         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1611         // generated NOP instructions. 11 is the largest size of one
1612         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1613         MaxLoopPad = 11;
1614       }
1615 #endif // COMPILER2
1616 
1617       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1618         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1619       }
1620       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1621         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1622           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1623         }
1624       }
1625     }
1626     if (is_atom_family() || is_knights_family()) {
1627 #ifdef COMPILER2
1628       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1629         OptoScheduling = true;
1630       }
1631 #endif
1632       if (supports_sse4_2()) { // Silvermont
1633         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1634           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1635         }
1636       }
1637       if (FLAG_IS_DEFAULT(UseIncDec)) {
1638         FLAG_SET_DEFAULT(UseIncDec, false);
1639       }
1640     }
1641     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1642       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1643     }
1644 #ifdef COMPILER2
1645     if (UseAVX > 2) {
1646       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1647           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1648            ArrayOperationPartialInlineSize != 0 &&
1649            ArrayOperationPartialInlineSize != 16 &&
1650            ArrayOperationPartialInlineSize != 32 &&
1651            ArrayOperationPartialInlineSize != 64)) {
1652         int inline_size = 0;
1653         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1654           inline_size = 64;
1655         } else if (MaxVectorSize >= 32) {
1656           inline_size = 32;
1657         } else if (MaxVectorSize >= 16) {
1658           inline_size = 16;
1659         }
1660         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1661           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1662         }
1663         ArrayOperationPartialInlineSize = inline_size;
1664       }
1665 
1666       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1667         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1668         if (ArrayOperationPartialInlineSize) {
1669           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1670         } else {
1671           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1672         }
1673       }
1674     }
1675 #endif
1676   }
1677 
1678 #ifdef COMPILER2
1679   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1680     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1681       OptimizeFill = false;
1682     }
1683   }
1684 #endif
1685   if (supports_sse4_2()) {
1686     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1687       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1688     }
1689   } else {
1690     if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1691       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1692     }
1693     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1694   }
1695   if (UseSSE42Intrinsics) {
1696     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1697       UseVectorizedMismatchIntrinsic = true;
1698     }
1699   } else if (UseVectorizedMismatchIntrinsic) {
1700     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1701       warning("vectorizedMismatch intrinsics are not available on this CPU");
1702     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1703   }
1704   if (UseAVX >= 2) {
1705     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1706   } else if (UseVectorizedHashCodeIntrinsic) {
1707     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1708       warning("vectorizedHashCode intrinsics are not available on this CPU");
1709     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1710   }
1711 
1712   // Use count leading zeros count instruction if available.
1713   if (supports_lzcnt()) {
1714     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1715       UseCountLeadingZerosInstruction = true;
1716     }
1717    } else if (UseCountLeadingZerosInstruction) {
1718     warning("lzcnt instruction is not available on this CPU");
1719     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1720   }
1721 
1722   // Use count trailing zeros instruction if available
1723   if (supports_bmi1()) {
1724     // tzcnt does not require VEX prefix
1725     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1726       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1727         // Don't use tzcnt if BMI1 is switched off on command line.
1728         UseCountTrailingZerosInstruction = false;
1729       } else {
1730         UseCountTrailingZerosInstruction = true;
1731       }
1732     }
1733   } else if (UseCountTrailingZerosInstruction) {
1734     warning("tzcnt instruction is not available on this CPU");
1735     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1736   }
1737 
1738   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1739   // VEX prefix is generated only when AVX > 0.
1740   if (supports_bmi1() && supports_avx()) {
1741     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1742       UseBMI1Instructions = true;
1743     }
1744   } else if (UseBMI1Instructions) {
1745     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1746     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1747   }
1748 
1749   if (supports_bmi2() && supports_avx()) {
1750     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1751       UseBMI2Instructions = true;
1752     }
1753   } else if (UseBMI2Instructions) {
1754     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1755     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1756   }
1757 
1758   // Use population count instruction if available.
1759   if (supports_popcnt()) {
1760     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1761       UsePopCountInstruction = true;
1762     }
1763   } else if (UsePopCountInstruction) {
1764     warning("POPCNT instruction is not available on this CPU");
1765     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1766   }
1767 
1768   // Use fast-string operations if available.
1769   if (supports_erms()) {
1770     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1771       UseFastStosb = true;
1772     }
1773   } else if (UseFastStosb) {
1774     warning("fast-string operations are not available on this CPU");
1775     FLAG_SET_DEFAULT(UseFastStosb, false);
1776   }
1777 
1778   // For AMD Processors use XMM/YMM MOVDQU instructions
1779   // for Object Initialization as default
1780   if (is_amd() && cpu_family() >= 0x19) {
1781     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1782       UseFastStosb = false;
1783     }
1784   }
1785 
1786 #ifdef COMPILER2
1787   if (is_intel() && MaxVectorSize > 16) {
1788     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1789       UseFastStosb = false;
1790     }
1791   }
1792 #endif
1793 
1794   // Use XMM/YMM MOVDQU instruction for Object Initialization
1795   if (!UseFastStosb && UseUnalignedLoadStores) {
1796     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1797       UseXMMForObjInit = true;
1798     }
1799   } else if (UseXMMForObjInit) {
1800     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1801     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1802   }
1803 
1804 #ifdef COMPILER2
1805   if (FLAG_IS_DEFAULT(AlignVector)) {
1806     // Modern processors allow misaligned memory operations for vectors.
1807     AlignVector = !UseUnalignedLoadStores;
1808   }
1809 #endif // COMPILER2
1810 
1811   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1812     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1813       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1814     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1815       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1816     }
1817   }
1818 
1819   // Allocation prefetch settings
1820   int cache_line_size = checked_cast<int>(prefetch_data_size());
1821   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1822       (cache_line_size > AllocatePrefetchStepSize)) {
1823     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1824   }
1825 
1826   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1827     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1828     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1829       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1830     }
1831     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1832   }
1833 
1834   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1835     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1836     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1837   }
1838 
1839   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1840     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1841         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1842       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1843     }
1844 #ifdef COMPILER2
1845     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1846       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1847     }
1848 #endif
1849   }
1850 
1851   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1852 #ifdef COMPILER2
1853     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1854       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1855     }
1856 #endif
1857   }
1858 
1859   // Prefetch settings
1860 
1861   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1862   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1863   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1864   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1865 
1866   // gc copy/scan is disabled if prefetchw isn't supported, because
1867   // Prefetch::write emits an inlined prefetchw on Linux.
1868   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1869   // The used prefetcht0 instruction works for both amd64 and em64t.
1870 
1871   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1872     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1873   }
1874   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1875     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1876   }
1877 
1878   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1879      (cache_line_size > ContendedPaddingWidth))
1880      ContendedPaddingWidth = cache_line_size;
1881 
1882   // This machine allows unaligned memory accesses
1883   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1884     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1885   }
1886 
1887 #ifndef PRODUCT
1888   if (log_is_enabled(Info, os, cpu)) {
1889     LogStream ls(Log(os, cpu)::info());
1890     outputStream* log = &ls;
1891     log->print_cr("Logical CPUs per core: %u",
1892                   logical_processors_per_package());
1893     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1894     log->print("UseSSE=%d", UseSSE);
1895     if (UseAVX > 0) {
1896       log->print("  UseAVX=%d", UseAVX);
1897     }
1898     if (UseAES) {
1899       log->print("  UseAES=1");
1900     }
1901 #ifdef COMPILER2
1902     if (MaxVectorSize > 0) {
1903       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1904     }
1905 #endif
1906     log->cr();
1907     log->print("Allocation");
1908     if (AllocatePrefetchStyle <= 0) {
1909       log->print_cr(": no prefetching");
1910     } else {
1911       log->print(" prefetching: ");
1912       if (AllocatePrefetchInstr == 0) {
1913         log->print("PREFETCHNTA");
1914       } else if (AllocatePrefetchInstr == 1) {
1915         log->print("PREFETCHT0");
1916       } else if (AllocatePrefetchInstr == 2) {
1917         log->print("PREFETCHT2");
1918       } else if (AllocatePrefetchInstr == 3) {
1919         log->print("PREFETCHW");
1920       }
1921       if (AllocatePrefetchLines > 1) {
1922         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1923       } else {
1924         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1925       }
1926     }
1927 
1928     if (PrefetchCopyIntervalInBytes > 0) {
1929       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1930     }
1931     if (PrefetchScanIntervalInBytes > 0) {
1932       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1933     }
1934     if (ContendedPaddingWidth > 0) {
1935       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1936     }
1937   }
1938 #endif // !PRODUCT
1939   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1940       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1941   }
1942   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1943       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1944   }
1945 }
1946 
1947 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1948   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1949   if (vrt == XenHVM) {
1950     st->print_cr("Xen hardware-assisted virtualization detected");
1951   } else if (vrt == KVM) {
1952     st->print_cr("KVM virtualization detected");
1953   } else if (vrt == VMWare) {
1954     st->print_cr("VMWare virtualization detected");
1955     VirtualizationSupport::print_virtualization_info(st);
1956   } else if (vrt == HyperV) {
1957     st->print_cr("Hyper-V virtualization detected");
1958   } else if (vrt == HyperVRole) {
1959     st->print_cr("Hyper-V role detected");
1960   }
1961 }
1962 
1963 bool VM_Version::compute_has_intel_jcc_erratum() {
1964   if (!is_intel_family_core()) {
1965     // Only Intel CPUs are affected.
1966     return false;
1967   }
1968   // The following table of affected CPUs is based on the following document released by Intel:
1969   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1970   switch (_model) {
1971   case 0x8E:
1972     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1973     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1974     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1975     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1976     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1977     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1978     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1979     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1980     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1981     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1982   case 0x4E:
1983     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1984     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1985     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1986     return _stepping == 0x3;
1987   case 0x55:
1988     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1989     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1990     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1991     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1992     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1993     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1994     return _stepping == 0x4 || _stepping == 0x7;
1995   case 0x5E:
1996     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1997     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1998     return _stepping == 0x3;
1999   case 0x9E:
2000     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2001     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2002     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2003     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2004     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2005     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2006     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2007     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2008     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2009     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2010     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2011     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2012     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2013     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2014     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2015   case 0xA5:
2016     // Not in Intel documentation.
2017     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2018     return true;
2019   case 0xA6:
2020     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2021     return _stepping == 0x0;
2022   case 0xAE:
2023     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2024     return _stepping == 0xA;
2025   default:
2026     // If we are running on another intel machine not recognized in the table, we are okay.
2027     return false;
2028   }
2029 }
2030 
2031 // On Xen, the cpuid instruction returns
2032 //  eax / registers[0]: Version of Xen
2033 //  ebx / registers[1]: chars 'XenV'
2034 //  ecx / registers[2]: chars 'MMXe'
2035 //  edx / registers[3]: chars 'nVMM'
2036 //
2037 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2038 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2039 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2040 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2041 //
2042 // more information :
2043 // https://kb.vmware.com/s/article/1009458
2044 //
2045 void VM_Version::check_virtualizations() {
2046   uint32_t registers[4] = {0};
2047   char signature[13] = {0};
2048 
2049   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2050   // from 0x40000000 until 0x40010000.
2051   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2052   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2053     detect_virt_stub(leaf, registers);
2054     memcpy(signature, &registers[1], 12);
2055 
2056     if (strncmp("VMwareVMware", signature, 12) == 0) {
2057       Abstract_VM_Version::_detected_virtualization = VMWare;
2058       // check for extended metrics from guestlib
2059       VirtualizationSupport::initialize();
2060     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2061       Abstract_VM_Version::_detected_virtualization = HyperV;
2062 #ifdef _WINDOWS
2063       // CPUID leaf 0x40000007 is available to the root partition only.
2064       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2065       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2066       detect_virt_stub(0x40000007, registers);
2067       if ((registers[0] != 0x0) ||
2068           (registers[1] != 0x0) ||
2069           (registers[2] != 0x0) ||
2070           (registers[3] != 0x0)) {
2071         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2072       }
2073 #endif
2074     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2075       Abstract_VM_Version::_detected_virtualization = KVM;
2076     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2077       Abstract_VM_Version::_detected_virtualization = XenHVM;
2078     }
2079   }
2080 }
2081 
2082 #ifdef COMPILER2
2083 // Determine if it's running on Cascade Lake using default options.
2084 bool VM_Version::is_default_intel_cascade_lake() {
2085   return FLAG_IS_DEFAULT(UseAVX) &&
2086          FLAG_IS_DEFAULT(MaxVectorSize) &&
2087          UseAVX > 2 &&
2088          is_intel_cascade_lake();
2089 }
2090 #endif
2091 
2092 bool VM_Version::is_intel_cascade_lake() {
2093   return is_intel_skylake() && _stepping >= 5;
2094 }
2095 
2096 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2097 // for implementing the array copy and clear operations.
2098 // The Intel platforms that supports the serialize instruction
2099 // has improved implementation of 64-byte load/stores and so the default
2100 // threshold is set to 0 for these platforms.
2101 int VM_Version::avx3_threshold() {
2102   return (is_intel_server_family() &&
2103           supports_serialize() &&
2104           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2105 }
2106 
2107 void VM_Version::clear_apx_test_state() {
2108   clear_apx_test_state_stub();
2109 }
2110 
2111 static bool _vm_version_initialized = false;
2112 
2113 void VM_Version::initialize() {
2114   ResourceMark rm;
2115 
2116   // Making this stub must be FIRST use of assembler
2117   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2118   if (stub_blob == nullptr) {
2119     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2120   }
2121   CodeBuffer c(stub_blob);
2122   VM_Version_StubGenerator g(&c);
2123 
2124   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2125                                      g.generate_get_cpu_info());
2126   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2127                                      g.generate_detect_virt());
2128   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2129                                      g.clear_apx_test_state());
2130   get_processor_features();
2131 
2132   Assembler::precompute_instructions();
2133 
2134   if (VM_Version::supports_hv()) { // Supports hypervisor
2135     check_virtualizations();
2136   }
2137   _vm_version_initialized = true;
2138 }
2139 
2140 typedef enum {
2141    CPU_FAMILY_8086_8088  = 0,
2142    CPU_FAMILY_INTEL_286  = 2,
2143    CPU_FAMILY_INTEL_386  = 3,
2144    CPU_FAMILY_INTEL_486  = 4,
2145    CPU_FAMILY_PENTIUM    = 5,
2146    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2147    CPU_FAMILY_PENTIUM_4  = 0xF
2148 } FamilyFlag;
2149 
2150 typedef enum {
2151   RDTSCP_FLAG  = 0x08000000, // bit 27
2152   INTEL64_FLAG = 0x20000000  // bit 29
2153 } _featureExtendedEdxFlag;
2154 
2155 typedef enum {
2156    FPU_FLAG     = 0x00000001,
2157    VME_FLAG     = 0x00000002,
2158    DE_FLAG      = 0x00000004,
2159    PSE_FLAG     = 0x00000008,
2160    TSC_FLAG     = 0x00000010,
2161    MSR_FLAG     = 0x00000020,
2162    PAE_FLAG     = 0x00000040,
2163    MCE_FLAG     = 0x00000080,
2164    CX8_FLAG     = 0x00000100,
2165    APIC_FLAG    = 0x00000200,
2166    SEP_FLAG     = 0x00000800,
2167    MTRR_FLAG    = 0x00001000,
2168    PGE_FLAG     = 0x00002000,
2169    MCA_FLAG     = 0x00004000,
2170    CMOV_FLAG    = 0x00008000,
2171    PAT_FLAG     = 0x00010000,
2172    PSE36_FLAG   = 0x00020000,
2173    PSNUM_FLAG   = 0x00040000,
2174    CLFLUSH_FLAG = 0x00080000,
2175    DTS_FLAG     = 0x00200000,
2176    ACPI_FLAG    = 0x00400000,
2177    MMX_FLAG     = 0x00800000,
2178    FXSR_FLAG    = 0x01000000,
2179    SSE_FLAG     = 0x02000000,
2180    SSE2_FLAG    = 0x04000000,
2181    SS_FLAG      = 0x08000000,
2182    HTT_FLAG     = 0x10000000,
2183    TM_FLAG      = 0x20000000
2184 } FeatureEdxFlag;
2185 
2186 static BufferBlob* cpuid_brand_string_stub_blob;
2187 static const int   cpuid_brand_string_stub_size = 550;
2188 
2189 extern "C" {
2190   typedef void (*getCPUIDBrandString_stub_t)(void*);
2191 }
2192 
2193 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2194 
2195 // VM_Version statics
2196 enum {
2197   ExtendedFamilyIdLength_INTEL = 16,
2198   ExtendedFamilyIdLength_AMD   = 24
2199 };
2200 
2201 const size_t VENDOR_LENGTH = 13;
2202 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2203 static char* _cpu_brand_string = nullptr;
2204 static int64_t _max_qualified_cpu_frequency = 0;
2205 
2206 static int _no_of_threads = 0;
2207 static int _no_of_cores = 0;
2208 
2209 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2210   "8086/8088",
2211   "",
2212   "286",
2213   "386",
2214   "486",
2215   "Pentium",
2216   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2217   "",
2218   "",
2219   "",
2220   "",
2221   "",
2222   "",
2223   "",
2224   "",
2225   "Pentium 4"
2226 };
2227 
2228 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2229   "",
2230   "",
2231   "",
2232   "",
2233   "5x86",
2234   "K5/K6",
2235   "Athlon/AthlonXP",
2236   "",
2237   "",
2238   "",
2239   "",
2240   "",
2241   "",
2242   "",
2243   "",
2244   "Opteron/Athlon64",
2245   "Opteron QC/Phenom",  // Barcelona et.al.
2246   "",
2247   "",
2248   "",
2249   "",
2250   "",
2251   "",
2252   "Zen"
2253 };
2254 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2255 // September 2013, Vol 3C Table 35-1
2256 const char* const _model_id_pentium_pro[] = {
2257   "",
2258   "Pentium Pro",
2259   "",
2260   "Pentium II model 3",
2261   "",
2262   "Pentium II model 5/Xeon/Celeron",
2263   "Celeron",
2264   "Pentium III/Pentium III Xeon",
2265   "Pentium III/Pentium III Xeon",
2266   "Pentium M model 9",    // Yonah
2267   "Pentium III, model A",
2268   "Pentium III, model B",
2269   "",
2270   "Pentium M model D",    // Dothan
2271   "",
2272   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2273   "",
2274   "",
2275   "",
2276   "",
2277   "",
2278   "",
2279   "Celeron",              // 0x16 Celeron 65nm
2280   "Core 2",               // 0x17 Penryn / Harpertown
2281   "",
2282   "",
2283   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2284   "Atom",                 // 0x1B Z5xx series Silverthorn
2285   "",
2286   "Core 2",               // 0x1D Dunnington (6-core)
2287   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2288   "",
2289   "",
2290   "",
2291   "",
2292   "",
2293   "",
2294   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2295   "",
2296   "",
2297   "",                     // 0x28
2298   "",
2299   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2300   "",
2301   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2302   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2303   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2304   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2305   "",
2306   "",
2307   "",
2308   "",
2309   "",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "",
2315   "Ivy Bridge",           // 0x3a
2316   "",
2317   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2318   "",                     // 0x3d "Next Generation Intel Core Processor"
2319   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2320   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2321   "",
2322   "",
2323   "",
2324   "",
2325   "",
2326   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2327   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2328   nullptr
2329 };
2330 
2331 /* Brand ID is for back compatibility
2332  * Newer CPUs uses the extended brand string */
2333 const char* const _brand_id[] = {
2334   "",
2335   "Celeron processor",
2336   "Pentium III processor",
2337   "Intel Pentium III Xeon processor",
2338   "",
2339   "",
2340   "",
2341   "",
2342   "Intel Pentium 4 processor",
2343   nullptr
2344 };
2345 
2346 
2347 const char* const _feature_edx_id[] = {
2348   "On-Chip FPU",
2349   "Virtual Mode Extensions",
2350   "Debugging Extensions",
2351   "Page Size Extensions",
2352   "Time Stamp Counter",
2353   "Model Specific Registers",
2354   "Physical Address Extension",
2355   "Machine Check Exceptions",
2356   "CMPXCHG8B Instruction",
2357   "On-Chip APIC",
2358   "",
2359   "Fast System Call",
2360   "Memory Type Range Registers",
2361   "Page Global Enable",
2362   "Machine Check Architecture",
2363   "Conditional Mov Instruction",
2364   "Page Attribute Table",
2365   "36-bit Page Size Extension",
2366   "Processor Serial Number",
2367   "CLFLUSH Instruction",
2368   "",
2369   "Debug Trace Store feature",
2370   "ACPI registers in MSR space",
2371   "Intel Architecture MMX Technology",
2372   "Fast Float Point Save and Restore",
2373   "Streaming SIMD extensions",
2374   "Streaming SIMD extensions 2",
2375   "Self-Snoop",
2376   "Hyper Threading",
2377   "Thermal Monitor",
2378   "",
2379   "Pending Break Enable"
2380 };
2381 
2382 const char* const _feature_extended_edx_id[] = {
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "",
2392   "",
2393   "",
2394   "SYSCALL/SYSRET",
2395   "",
2396   "",
2397   "",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "",
2403   "Execute Disable Bit",
2404   "",
2405   "",
2406   "",
2407   "",
2408   "",
2409   "",
2410   "RDTSCP",
2411   "",
2412   "Intel 64 Architecture",
2413   "",
2414   ""
2415 };
2416 
2417 const char* const _feature_ecx_id[] = {
2418   "Streaming SIMD Extensions 3",
2419   "PCLMULQDQ",
2420   "64-bit DS Area",
2421   "MONITOR/MWAIT instructions",
2422   "CPL Qualified Debug Store",
2423   "Virtual Machine Extensions",
2424   "Safer Mode Extensions",
2425   "Enhanced Intel SpeedStep technology",
2426   "Thermal Monitor 2",
2427   "Supplemental Streaming SIMD Extensions 3",
2428   "L1 Context ID",
2429   "",
2430   "Fused Multiply-Add",
2431   "CMPXCHG16B",
2432   "xTPR Update Control",
2433   "Perfmon and Debug Capability",
2434   "",
2435   "Process-context identifiers",
2436   "Direct Cache Access",
2437   "Streaming SIMD extensions 4.1",
2438   "Streaming SIMD extensions 4.2",
2439   "x2APIC",
2440   "MOVBE",
2441   "Popcount instruction",
2442   "TSC-Deadline",
2443   "AESNI",
2444   "XSAVE",
2445   "OSXSAVE",
2446   "AVX",
2447   "F16C",
2448   "RDRAND",
2449   ""
2450 };
2451 
2452 const char* const _feature_extended_ecx_id[] = {
2453   "LAHF/SAHF instruction support",
2454   "Core multi-processor legacy mode",
2455   "",
2456   "",
2457   "",
2458   "Advanced Bit Manipulations: LZCNT",
2459   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2460   "Misaligned SSE mode",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   ""
2485 };
2486 
2487 void VM_Version::initialize_tsc(void) {
2488   ResourceMark rm;
2489 
2490   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2491   if (cpuid_brand_string_stub_blob == nullptr) {
2492     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2493   }
2494   CodeBuffer c(cpuid_brand_string_stub_blob);
2495   VM_Version_StubGenerator g(&c);
2496   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2497                                    g.generate_getCPUIDBrandString());
2498 }
2499 
2500 const char* VM_Version::cpu_model_description(void) {
2501   uint32_t cpu_family = extended_cpu_family();
2502   uint32_t cpu_model = extended_cpu_model();
2503   const char* model = nullptr;
2504 
2505   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2506     for (uint32_t i = 0; i <= cpu_model; i++) {
2507       model = _model_id_pentium_pro[i];
2508       if (model == nullptr) {
2509         break;
2510       }
2511     }
2512   }
2513   return model;
2514 }
2515 
2516 const char* VM_Version::cpu_brand_string(void) {
2517   if (_cpu_brand_string == nullptr) {
2518     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2519     if (nullptr == _cpu_brand_string) {
2520       return nullptr;
2521     }
2522     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2523     if (ret_val != OS_OK) {
2524       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2525       _cpu_brand_string = nullptr;
2526     }
2527   }
2528   return _cpu_brand_string;
2529 }
2530 
2531 const char* VM_Version::cpu_brand(void) {
2532   const char*  brand  = nullptr;
2533 
2534   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2535     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2536     brand = _brand_id[0];
2537     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2538       brand = _brand_id[i];
2539     }
2540   }
2541   return brand;
2542 }
2543 
2544 bool VM_Version::cpu_is_em64t(void) {
2545   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2546 }
2547 
2548 bool VM_Version::is_netburst(void) {
2549   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2550 }
2551 
2552 bool VM_Version::supports_tscinv_ext(void) {
2553   if (!supports_tscinv_bit()) {
2554     return false;
2555   }
2556 
2557   if (is_intel()) {
2558     return true;
2559   }
2560 
2561   if (is_amd()) {
2562     return !is_amd_Barcelona();
2563   }
2564 
2565   if (is_hygon()) {
2566     return true;
2567   }
2568 
2569   return false;
2570 }
2571 
2572 void VM_Version::resolve_cpu_information_details(void) {
2573 
2574   // in future we want to base this information on proper cpu
2575   // and cache topology enumeration such as:
2576   // Intel 64 Architecture Processor Topology Enumeration
2577   // which supports system cpu and cache topology enumeration
2578   // either using 2xAPICIDs or initial APICIDs
2579 
2580   // currently only rough cpu information estimates
2581   // which will not necessarily reflect the exact configuration of the system
2582 
2583   // this is the number of logical hardware threads
2584   // visible to the operating system
2585   _no_of_threads = os::processor_count();
2586 
2587   // find out number of threads per cpu package
2588   int threads_per_package = threads_per_core() * cores_per_cpu();
2589 
2590   // use amount of threads visible to the process in order to guess number of sockets
2591   _no_of_sockets = _no_of_threads / threads_per_package;
2592 
2593   // process might only see a subset of the total number of threads
2594   // from a single processor package. Virtualization/resource management for example.
2595   // If so then just write a hard 1 as num of pkgs.
2596   if (0 == _no_of_sockets) {
2597     _no_of_sockets = 1;
2598   }
2599 
2600   // estimate the number of cores
2601   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2602 }
2603 
2604 
2605 const char* VM_Version::cpu_family_description(void) {
2606   int cpu_family_id = extended_cpu_family();
2607   if (is_amd()) {
2608     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2609       return _family_id_amd[cpu_family_id];
2610     }
2611   }
2612   if (is_intel()) {
2613     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2614       return cpu_model_description();
2615     }
2616     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2617       return _family_id_intel[cpu_family_id];
2618     }
2619   }
2620   if (is_hygon()) {
2621     return "Dhyana";
2622   }
2623   return "Unknown x86";
2624 }
2625 
2626 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2627   assert(buf != nullptr, "buffer is null!");
2628   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2629 
2630   const char* cpu_type = nullptr;
2631   const char* x64 = nullptr;
2632 
2633   if (is_intel()) {
2634     cpu_type = "Intel";
2635     x64 = cpu_is_em64t() ? " Intel64" : "";
2636   } else if (is_amd()) {
2637     cpu_type = "AMD";
2638     x64 = cpu_is_em64t() ? " AMD64" : "";
2639   } else if (is_hygon()) {
2640     cpu_type = "Hygon";
2641     x64 = cpu_is_em64t() ? " AMD64" : "";
2642   } else {
2643     cpu_type = "Unknown x86";
2644     x64 = cpu_is_em64t() ? " x86_64" : "";
2645   }
2646 
2647   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2648     cpu_type,
2649     cpu_family_description(),
2650     supports_ht() ? " (HT)" : "",
2651     supports_sse3() ? " SSE3" : "",
2652     supports_ssse3() ? " SSSE3" : "",
2653     supports_sse4_1() ? " SSE4.1" : "",
2654     supports_sse4_2() ? " SSE4.2" : "",
2655     supports_sse4a() ? " SSE4A" : "",
2656     is_netburst() ? " Netburst" : "",
2657     is_intel_family_core() ? " Core" : "",
2658     x64);
2659 
2660   return OS_OK;
2661 }
2662 
2663 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2664   assert(buf != nullptr, "buffer is null!");
2665   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2666   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2667 
2668   // invoke newly generated asm code to fetch CPU Brand String
2669   getCPUIDBrandString_stub(&_cpuid_info);
2670 
2671   // fetch results into buffer
2672   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2673   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2674   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2675   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2676   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2677   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2678   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2679   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2680   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2681   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2682   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2683   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2684 
2685   return OS_OK;
2686 }
2687 
2688 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2689   guarantee(buf != nullptr, "buffer is null!");
2690   guarantee(buf_len > 0, "buffer len not enough!");
2691 
2692   unsigned int flag = 0;
2693   unsigned int fi = 0;
2694   size_t       written = 0;
2695   const char*  prefix = "";
2696 
2697 #define WRITE_TO_BUF(string)                                                          \
2698   {                                                                                   \
2699     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2700     if (res < 0) {                                                                    \
2701       return buf_len - 1;                                                             \
2702     }                                                                                 \
2703     written += res;                                                                   \
2704     if (prefix[0] == '\0') {                                                          \
2705       prefix = ", ";                                                                  \
2706     }                                                                                 \
2707   }
2708 
2709   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2710     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2711       continue; /* no hyperthreading */
2712     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2713       continue; /* no fast system call */
2714     }
2715     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2716       WRITE_TO_BUF(_feature_edx_id[fi]);
2717     }
2718   }
2719 
2720   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2721     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2722       WRITE_TO_BUF(_feature_ecx_id[fi]);
2723     }
2724   }
2725 
2726   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2727     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2728       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2729     }
2730   }
2731 
2732   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2733     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2734       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2735     }
2736   }
2737 
2738   if (supports_tscinv_bit()) {
2739       WRITE_TO_BUF("Invariant TSC");
2740   }
2741 
2742   return written;
2743 }
2744 
2745 /**
2746  * Write a detailed description of the cpu to a given buffer, including
2747  * feature set.
2748  */
2749 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2750   assert(buf != nullptr, "buffer is null!");
2751   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2752 
2753   static const char* unknown = "<unknown>";
2754   char               vendor_id[VENDOR_LENGTH];
2755   const char*        family = nullptr;
2756   const char*        model = nullptr;
2757   const char*        brand = nullptr;
2758   int                outputLen = 0;
2759 
2760   family = cpu_family_description();
2761   if (family == nullptr) {
2762     family = unknown;
2763   }
2764 
2765   model = cpu_model_description();
2766   if (model == nullptr) {
2767     model = unknown;
2768   }
2769 
2770   brand = cpu_brand_string();
2771 
2772   if (brand == nullptr) {
2773     brand = cpu_brand();
2774     if (brand == nullptr) {
2775       brand = unknown;
2776     }
2777   }
2778 
2779   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2780   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2781   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2782   vendor_id[VENDOR_LENGTH-1] = '\0';
2783 
2784   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2785     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2786     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2787     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2788     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2789     "Supports: ",
2790     brand,
2791     vendor_id,
2792     family,
2793     extended_cpu_family(),
2794     model,
2795     extended_cpu_model(),
2796     cpu_stepping(),
2797     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2798     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2799     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2800     _cpuid_info.std_cpuid1_eax.value,
2801     _cpuid_info.std_cpuid1_ebx.value,
2802     _cpuid_info.std_cpuid1_ecx.value,
2803     _cpuid_info.std_cpuid1_edx.value,
2804     _cpuid_info.ext_cpuid1_eax,
2805     _cpuid_info.ext_cpuid1_ebx,
2806     _cpuid_info.ext_cpuid1_ecx,
2807     _cpuid_info.ext_cpuid1_edx);
2808 
2809   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2810     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2811     return OS_ERR;
2812   }
2813 
2814   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2815 
2816   return OS_OK;
2817 }
2818 
2819 
2820 // Fill in Abstract_VM_Version statics
2821 void VM_Version::initialize_cpu_information() {
2822   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2823   assert(!_initialized, "shouldn't be initialized yet");
2824   resolve_cpu_information_details();
2825 
2826   // initialize cpu_name and cpu_desc
2827   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2828   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2829   _initialized = true;
2830 }
2831 
2832 /**
2833  *  For information about extracting the frequency from the cpu brand string, please see:
2834  *
2835  *    Intel Processor Identification and the CPUID Instruction
2836  *    Application Note 485
2837  *    May 2012
2838  *
2839  * The return value is the frequency in Hz.
2840  */
2841 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2842   const char* const brand_string = cpu_brand_string();
2843   if (brand_string == nullptr) {
2844     return 0;
2845   }
2846   const int64_t MEGA = 1000000;
2847   int64_t multiplier = 0;
2848   int64_t frequency = 0;
2849   uint8_t idx = 0;
2850   // The brand string buffer is at most 48 bytes.
2851   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2852   for (; idx < 48-2; ++idx) {
2853     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2854     // Search brand string for "yHz" where y is M, G, or T.
2855     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2856       if (brand_string[idx] == 'M') {
2857         multiplier = MEGA;
2858       } else if (brand_string[idx] == 'G') {
2859         multiplier = MEGA * 1000;
2860       } else if (brand_string[idx] == 'T') {
2861         multiplier = MEGA * MEGA;
2862       }
2863       break;
2864     }
2865   }
2866   if (multiplier > 0) {
2867     // Compute frequency (in Hz) from brand string.
2868     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2869       frequency =  (brand_string[idx-4] - '0') * multiplier;
2870       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2871       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2872     } else { // format is "xxxx"
2873       frequency =  (brand_string[idx-4] - '0') * 1000;
2874       frequency += (brand_string[idx-3] - '0') * 100;
2875       frequency += (brand_string[idx-2] - '0') * 10;
2876       frequency += (brand_string[idx-1] - '0');
2877       frequency *= multiplier;
2878     }
2879   }
2880   return frequency;
2881 }
2882 
2883 
2884 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2885   if (_max_qualified_cpu_frequency == 0) {
2886     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2887   }
2888   return _max_qualified_cpu_frequency;
2889 }
2890 
2891 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2892   VM_Features vm_features;
2893   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2894     vm_features.set_feature(CPU_CX8);
2895   if (std_cpuid1_edx.bits.cmov != 0)
2896     vm_features.set_feature(CPU_CMOV);
2897   if (std_cpuid1_edx.bits.clflush != 0)
2898     vm_features.set_feature(CPU_FLUSH);
2899   // clflush should always be available on x86_64
2900   // if not we are in real trouble because we rely on it
2901   // to flush the code cache.
2902   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2903   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2904       ext_cpuid1_edx.bits.fxsr != 0))
2905     vm_features.set_feature(CPU_FXSR);
2906   // HT flag is set for multi-core processors also.
2907   if (threads_per_core() > 1)
2908     vm_features.set_feature(CPU_HT);
2909   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2910       ext_cpuid1_edx.bits.mmx != 0))
2911     vm_features.set_feature(CPU_MMX);
2912   if (std_cpuid1_edx.bits.sse != 0)
2913     vm_features.set_feature(CPU_SSE);
2914   if (std_cpuid1_edx.bits.sse2 != 0)
2915     vm_features.set_feature(CPU_SSE2);
2916   if (std_cpuid1_ecx.bits.sse3 != 0)
2917     vm_features.set_feature(CPU_SSE3);
2918   if (std_cpuid1_ecx.bits.ssse3 != 0)
2919     vm_features.set_feature(CPU_SSSE3);
2920   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2921     vm_features.set_feature(CPU_SSE4_1);
2922   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2923     vm_features.set_feature(CPU_SSE4_2);
2924   if (std_cpuid1_ecx.bits.popcnt != 0)
2925     vm_features.set_feature(CPU_POPCNT);
2926   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2927       xem_xcr0_eax.bits.apx_f != 0 &&
2928       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2929     vm_features.set_feature(CPU_APX_F);
2930   }
2931   if (std_cpuid1_ecx.bits.avx != 0 &&
2932       std_cpuid1_ecx.bits.osxsave != 0 &&
2933       xem_xcr0_eax.bits.sse != 0 &&
2934       xem_xcr0_eax.bits.ymm != 0) {
2935     vm_features.set_feature(CPU_AVX);
2936     vm_features.set_feature(CPU_VZEROUPPER);
2937     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2938       vm_features.set_feature(CPU_SHA512);
2939     if (std_cpuid1_ecx.bits.f16c != 0)
2940       vm_features.set_feature(CPU_F16C);
2941     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2942       vm_features.set_feature(CPU_AVX2);
2943       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2944         vm_features.set_feature(CPU_AVX_IFMA);
2945     }
2946     if (sef_cpuid7_ecx.bits.gfni != 0)
2947         vm_features.set_feature(CPU_GFNI);
2948     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2949         xem_xcr0_eax.bits.opmask != 0 &&
2950         xem_xcr0_eax.bits.zmm512 != 0 &&
2951         xem_xcr0_eax.bits.zmm32 != 0) {
2952       vm_features.set_feature(CPU_AVX512F);
2953       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2954         vm_features.set_feature(CPU_AVX512CD);
2955       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2956         vm_features.set_feature(CPU_AVX512DQ);
2957       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2958         vm_features.set_feature(CPU_AVX512_IFMA);
2959       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2960         vm_features.set_feature(CPU_AVX512PF);
2961       if (sef_cpuid7_ebx.bits.avx512er != 0)
2962         vm_features.set_feature(CPU_AVX512ER);
2963       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2964         vm_features.set_feature(CPU_AVX512BW);
2965       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2966         vm_features.set_feature(CPU_AVX512VL);
2967       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2968         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2969       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2970         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2971       if (sef_cpuid7_ecx.bits.vaes != 0)
2972         vm_features.set_feature(CPU_AVX512_VAES);
2973       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2974         vm_features.set_feature(CPU_AVX512_VNNI);
2975       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2976         vm_features.set_feature(CPU_AVX512_BITALG);
2977       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2978         vm_features.set_feature(CPU_AVX512_VBMI);
2979       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2980         vm_features.set_feature(CPU_AVX512_VBMI2);
2981     }
2982     if (is_intel()) {
2983       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2984           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2985           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2986           xem_xcr0_eax.bits.opmask != 0 &&
2987           xem_xcr0_eax.bits.zmm512 != 0 &&
2988           xem_xcr0_eax.bits.zmm32 != 0) {
2989         vm_features.set_feature(CPU_AVX10_1);
2990         vm_features.set_feature(CPU_AVX512F);
2991         vm_features.set_feature(CPU_AVX512CD);
2992         vm_features.set_feature(CPU_AVX512DQ);
2993         vm_features.set_feature(CPU_AVX512PF);
2994         vm_features.set_feature(CPU_AVX512ER);
2995         vm_features.set_feature(CPU_AVX512BW);
2996         vm_features.set_feature(CPU_AVX512VL);
2997         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2998         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2999         vm_features.set_feature(CPU_AVX512_VAES);
3000         vm_features.set_feature(CPU_AVX512_VNNI);
3001         vm_features.set_feature(CPU_AVX512_BITALG);
3002         vm_features.set_feature(CPU_AVX512_VBMI);
3003         vm_features.set_feature(CPU_AVX512_VBMI2);
3004         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3005           vm_features.set_feature(CPU_AVX10_2);
3006         }
3007       }
3008     }
3009   }
3010 
3011   if (std_cpuid1_ecx.bits.hv != 0)
3012     vm_features.set_feature(CPU_HV);
3013   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3014     vm_features.set_feature(CPU_BMI1);
3015   if (std_cpuid1_edx.bits.tsc != 0)
3016     vm_features.set_feature(CPU_TSC);
3017   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3018     vm_features.set_feature(CPU_TSCINV_BIT);
3019   if (std_cpuid1_ecx.bits.aes != 0)
3020     vm_features.set_feature(CPU_AES);
3021   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3022     vm_features.set_feature(CPU_LZCNT);
3023   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3024     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3025   if (sef_cpuid7_ebx.bits.erms != 0)
3026     vm_features.set_feature(CPU_ERMS);
3027   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3028     vm_features.set_feature(CPU_FSRM);
3029   if (std_cpuid1_ecx.bits.clmul != 0)
3030     vm_features.set_feature(CPU_CLMUL);
3031   if (sef_cpuid7_ebx.bits.rtm != 0)
3032     vm_features.set_feature(CPU_RTM);
3033   if (sef_cpuid7_ebx.bits.adx != 0)
3034      vm_features.set_feature(CPU_ADX);
3035   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3036     vm_features.set_feature(CPU_BMI2);
3037   if (sef_cpuid7_ebx.bits.sha != 0)
3038     vm_features.set_feature(CPU_SHA);
3039   if (std_cpuid1_ecx.bits.fma != 0)
3040     vm_features.set_feature(CPU_FMA);
3041   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3042     vm_features.set_feature(CPU_FLUSHOPT);
3043   if (sef_cpuid7_ebx.bits.clwb != 0)
3044     vm_features.set_feature(CPU_CLWB);
3045   if (ext_cpuid1_edx.bits.rdtscp != 0)
3046     vm_features.set_feature(CPU_RDTSCP);
3047   if (sef_cpuid7_ecx.bits.rdpid != 0)
3048     vm_features.set_feature(CPU_RDPID);
3049 
3050   // AMD|Hygon additional features.
3051   if (is_amd_family()) {
3052     // PREFETCHW was checked above, check TDNOW here.
3053     if ((ext_cpuid1_edx.bits.tdnow != 0))
3054       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3055     if (ext_cpuid1_ecx.bits.sse4a != 0)
3056       vm_features.set_feature(CPU_SSE4A);
3057   }
3058 
3059   // Intel additional features.
3060   if (is_intel()) {
3061     if (sef_cpuid7_edx.bits.serialize != 0)
3062       vm_features.set_feature(CPU_SERIALIZE);
3063     if (sef_cpuid7_edx.bits.hybrid != 0)
3064       vm_features.set_feature(CPU_HYBRID);
3065     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3066       vm_features.set_feature(CPU_AVX512_FP16);
3067   }
3068 
3069   // ZX additional features.
3070   if (is_zx()) {
3071     // We do not know if these are supported by ZX, so we cannot trust
3072     // common CPUID bit for them.
3073     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3074     vm_features.clear_feature(CPU_CLWB);
3075   }
3076 
3077   // Protection key features.
3078   if (sef_cpuid7_ecx.bits.pku != 0) {
3079     vm_features.set_feature(CPU_PKU);
3080   }
3081   if (sef_cpuid7_ecx.bits.ospke != 0) {
3082     vm_features.set_feature(CPU_OSPKE);
3083   }
3084 
3085   // Control flow enforcement (CET) features.
3086   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3087     vm_features.set_feature(CPU_CET_SS);
3088   }
3089   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3090     vm_features.set_feature(CPU_CET_IBT);
3091   }
3092 
3093   // Composite features.
3094   if (supports_tscinv_bit() &&
3095       ((is_amd_family() && !is_amd_Barcelona()) ||
3096        is_intel_tsc_synched_at_init())) {
3097     vm_features.set_feature(CPU_TSCINV);
3098   }
3099   return vm_features;
3100 }
3101 
3102 bool VM_Version::os_supports_avx_vectors() {
3103   bool retVal = false;
3104   int nreg = 4;
3105   if (supports_evex()) {
3106     // Verify that OS save/restore all bits of EVEX registers
3107     // during signal processing.
3108     retVal = true;
3109     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3110       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3111         retVal = false;
3112         break;
3113       }
3114     }
3115   } else if (supports_avx()) {
3116     // Verify that OS save/restore all bits of AVX registers
3117     // during signal processing.
3118     retVal = true;
3119     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3120       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3121         retVal = false;
3122         break;
3123       }
3124     }
3125     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3126     if (retVal == false) {
3127       // Verify that OS save/restore all bits of EVEX registers
3128       // during signal processing.
3129       retVal = true;
3130       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3131         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3132           retVal = false;
3133           break;
3134         }
3135       }
3136     }
3137   }
3138   return retVal;
3139 }
3140 
3141 bool VM_Version::os_supports_apx_egprs() {
3142   if (!supports_apx_f()) {
3143     return false;
3144   }
3145   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3146       _cpuid_info.apx_save[1] != egpr_test_value()) {
3147     return false;
3148   }
3149   return true;
3150 }
3151 
3152 uint VM_Version::cores_per_cpu() {
3153   uint result = 1;
3154   if (is_intel()) {
3155     bool supports_topology = supports_processor_topology();
3156     if (supports_topology) {
3157       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3158                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3159     }
3160     if (!supports_topology || result == 0) {
3161       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3162     }
3163   } else if (is_amd_family()) {
3164     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3165     if (cpu_family() >= 0x17) { // Zen or later
3166       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3167     }
3168   } else if (is_zx()) {
3169     bool supports_topology = supports_processor_topology();
3170     if (supports_topology) {
3171       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3172                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3173     }
3174     if (!supports_topology || result == 0) {
3175       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3176     }
3177   }
3178   return result;
3179 }
3180 
3181 uint VM_Version::threads_per_core() {
3182   uint result = 1;
3183   if (is_intel() && supports_processor_topology()) {
3184     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3185   } else if (is_zx() && supports_processor_topology()) {
3186     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3187   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3188     if (cpu_family() >= 0x17) {
3189       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3190     } else {
3191       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3192                  cores_per_cpu();
3193     }
3194   }
3195   return (result == 0 ? 1 : result);
3196 }
3197 
3198 uint VM_Version::L1_line_size() {
3199   uint result = 0;
3200   if (is_intel()) {
3201     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3202   } else if (is_amd_family()) {
3203     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3204   } else if (is_zx()) {
3205     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3206   }
3207   if (result < 32) // not defined ?
3208     result = 32;   // 32 bytes by default on x86 and other x64
3209   return result;
3210 }
3211 
3212 bool VM_Version::is_intel_tsc_synched_at_init() {
3213   if (is_intel_family_core()) {
3214     uint32_t ext_model = extended_cpu_model();
3215     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3216         ext_model == CPU_MODEL_WESTMERE_EP    ||
3217         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3218         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3219       // <= 2-socket invariant tsc support. EX versions are usually used
3220       // in > 2-socket systems and likely don't synchronize tscs at
3221       // initialization.
3222       // Code that uses tsc values must be prepared for them to arbitrarily
3223       // jump forward or backward.
3224       return true;
3225     }
3226   }
3227   return false;
3228 }
3229 
3230 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3231   // Hardware prefetching (distance/size in bytes):
3232   // Pentium 3 -  64 /  32
3233   // Pentium 4 - 256 / 128
3234   // Athlon    -  64 /  32 ????
3235   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3236   // Core      - 128 /  64
3237   //
3238   // Software prefetching (distance in bytes / instruction with best score):
3239   // Pentium 3 - 128 / prefetchnta
3240   // Pentium 4 - 512 / prefetchnta
3241   // Athlon    - 128 / prefetchnta
3242   // Opteron   - 256 / prefetchnta
3243   // Core      - 256 / prefetchnta
3244   // It will be used only when AllocatePrefetchStyle > 0
3245 
3246   if (is_amd_family()) { // AMD | Hygon
3247     if (supports_sse2()) {
3248       return 256; // Opteron
3249     } else {
3250       return 128; // Athlon
3251     }
3252   } else { // Intel
3253     if (supports_sse3() && is_intel_server_family()) {
3254       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3255         return 192;
3256       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3257         return 384;
3258       }
3259     }
3260     if (supports_sse2()) {
3261       if (is_intel_server_family()) {
3262         return 256; // Pentium M, Core, Core2
3263       } else {
3264         return 512; // Pentium 4
3265       }
3266     } else {
3267       return 128; // Pentium 3 (and all other old CPUs)
3268     }
3269   }
3270 }
3271 
3272 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3273   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3274   switch (id) {
3275   case vmIntrinsics::_floatToFloat16:
3276   case vmIntrinsics::_float16ToFloat:
3277     if (!supports_float16()) {
3278       return false;
3279     }
3280     break;
3281   default:
3282     break;
3283   }
3284   return true;
3285 }
3286 
3287 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3288   int i = 0;
3289   ss.join([&]() {
3290     const char* str = nullptr;
3291     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3292       if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3293         str = _features_names[i];
3294       }
3295       i += 1;
3296     }
3297     return str;
3298   }, ", ");
3299 }
3300 
3301 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3302   VM_Features* features = (VM_Features*)features_buffer;
3303   insert_features_names(*features, ss);
3304 }
3305 
3306 void VM_Version::get_missing_features_name(void* features_buffer, stringStream& ss) {
3307   VM_Features* features_to_test = (VM_Features*)features_buffer;
3308   int i = 0;
3309   ss.join([&]() {
3310     const char* str = nullptr;
3311     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3312       Feature_Flag flag = (Feature_Flag)i;
3313       if (features_to_test->supports_feature(flag) && !_features.supports_feature(flag)) {
3314         str = _features_names[i];
3315       }
3316       i += 1;
3317     }
3318     return str;
3319   }, ", ");
3320 }
3321 
3322 int VM_Version::cpu_features_size() {
3323   return sizeof(VM_Features);
3324 }
3325 
3326 void VM_Version::store_cpu_features(void* buf) {
3327   VM_Features copy = _features;
3328   copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
3329   memcpy(buf, &copy, sizeof(VM_Features));
3330 }
3331 
3332 bool VM_Version::supports_features(void* features_buffer) {
3333   VM_Features* features_to_test = (VM_Features*)features_buffer;
3334   return _features.supports_features(features_to_test);
3335 }