1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2000;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76 }
  77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  78 static detect_virt_stub_t detect_virt_stub = nullptr;
  79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  80 
  81 bool VM_Version::supports_clflush() {
  82   // clflush should always be available on x86_64
  83   // if not we are in real trouble because we rely on it
  84   // to flush the code cache.
  85   // Unfortunately, Assembler::clflush is currently called as part
  86   // of generation of the code cache flush routine. This happens
  87   // under Universe::init before the processor features are set
  88   // up. Assembler::flush calls this routine to check that clflush
  89   // is allowed. So, we give the caller a free pass if Universe init
  90   // is still in progress.
  91   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  92   return true;
  93 }
  94 
  95 #define CPUID_STANDARD_FN   0x0
  96 #define CPUID_STANDARD_FN_1 0x1
  97 #define CPUID_STANDARD_FN_4 0x4
  98 #define CPUID_STANDARD_FN_B 0xb
  99 
 100 #define CPUID_EXTENDED_FN   0x80000000
 101 #define CPUID_EXTENDED_FN_1 0x80000001
 102 #define CPUID_EXTENDED_FN_2 0x80000002
 103 #define CPUID_EXTENDED_FN_3 0x80000003
 104 #define CPUID_EXTENDED_FN_4 0x80000004
 105 #define CPUID_EXTENDED_FN_7 0x80000007
 106 #define CPUID_EXTENDED_FN_8 0x80000008
 107 
 108 class VM_Version_StubGenerator: public StubCodeGenerator {
 109  public:
 110 
 111   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 112 
 113   address clear_apx_test_state() {
 114 #   define __ _masm->
 115     address start = __ pc();
 116     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 117     // handling guarantees that preserved register values post signal handling were
 118     // re-instantiated by operating system and not because they were not modified externally.
 119 
 120     bool save_apx = UseAPX;
 121     VM_Version::set_apx_cpuFeatures();
 122     UseAPX = true;
 123     // EGPR state save/restoration.
 124     __ mov64(r16, 0L);
 125     __ mov64(r31, 0L);
 126     UseAPX = save_apx;
 127     VM_Version::clean_cpuFeatures();
 128     __ ret(0);
 129     return start;
 130   }
 131 
 132   address generate_get_cpu_info() {
 133     // Flags to test CPU type.
 134     const uint32_t HS_EFL_AC = 0x40000;
 135     const uint32_t HS_EFL_ID = 0x200000;
 136     // Values for when we don't have a CPUID instruction.
 137     const int      CPU_FAMILY_SHIFT = 8;
 138     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 139     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 140     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 141 
 142     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24;
 143     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 144     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 145     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 146 
 147     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 148 #   define __ _masm->
 149 
 150     address start = __ pc();
 151 
 152     //
 153     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 154     //
 155     // rcx and rdx are first and second argument registers on windows
 156 
 157     __ push(rbp);
 158     __ mov(rbp, c_rarg0); // cpuid_info address
 159     __ push(rbx);
 160     __ push(rsi);
 161     __ pushf();          // preserve rbx, and flags
 162     __ pop(rax);
 163     __ push(rax);
 164     __ mov(rcx, rax);
 165     //
 166     // if we are unable to change the AC flag, we have a 386
 167     //
 168     __ xorl(rax, HS_EFL_AC);
 169     __ push(rax);
 170     __ popf();
 171     __ pushf();
 172     __ pop(rax);
 173     __ cmpptr(rax, rcx);
 174     __ jccb(Assembler::notEqual, detect_486);
 175 
 176     __ movl(rax, CPU_FAMILY_386);
 177     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 178     __ jmp(done);
 179 
 180     //
 181     // If we are unable to change the ID flag, we have a 486 which does
 182     // not support the "cpuid" instruction.
 183     //
 184     __ bind(detect_486);
 185     __ mov(rax, rcx);
 186     __ xorl(rax, HS_EFL_ID);
 187     __ push(rax);
 188     __ popf();
 189     __ pushf();
 190     __ pop(rax);
 191     __ cmpptr(rcx, rax);
 192     __ jccb(Assembler::notEqual, detect_586);
 193 
 194     __ bind(cpu486);
 195     __ movl(rax, CPU_FAMILY_486);
 196     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 197     __ jmp(done);
 198 
 199     //
 200     // At this point, we have a chip which supports the "cpuid" instruction
 201     //
 202     __ bind(detect_586);
 203     __ xorl(rax, rax);
 204     __ cpuid();
 205     __ orl(rax, rax);
 206     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 207                                         // value of at least 1, we give up and
 208                                         // assume a 486
 209     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 210     __ movl(Address(rsi, 0), rax);
 211     __ movl(Address(rsi, 4), rbx);
 212     __ movl(Address(rsi, 8), rcx);
 213     __ movl(Address(rsi,12), rdx);
 214 
 215     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 216     __ jccb(Assembler::belowEqual, std_cpuid4);
 217 
 218     //
 219     // cpuid(0xB) Processor Topology
 220     //
 221     __ movl(rax, 0xb);
 222     __ xorl(rcx, rcx);   // Threads level
 223     __ cpuid();
 224 
 225     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 226     __ movl(Address(rsi, 0), rax);
 227     __ movl(Address(rsi, 4), rbx);
 228     __ movl(Address(rsi, 8), rcx);
 229     __ movl(Address(rsi,12), rdx);
 230 
 231     __ movl(rax, 0xb);
 232     __ movl(rcx, 1);     // Cores level
 233     __ cpuid();
 234     __ push(rax);
 235     __ andl(rax, 0x1f);  // Determine if valid topology level
 236     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 237     __ andl(rax, 0xffff);
 238     __ pop(rax);
 239     __ jccb(Assembler::equal, std_cpuid4);
 240 
 241     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 242     __ movl(Address(rsi, 0), rax);
 243     __ movl(Address(rsi, 4), rbx);
 244     __ movl(Address(rsi, 8), rcx);
 245     __ movl(Address(rsi,12), rdx);
 246 
 247     __ movl(rax, 0xb);
 248     __ movl(rcx, 2);     // Packages level
 249     __ cpuid();
 250     __ push(rax);
 251     __ andl(rax, 0x1f);  // Determine if valid topology level
 252     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 253     __ andl(rax, 0xffff);
 254     __ pop(rax);
 255     __ jccb(Assembler::equal, std_cpuid4);
 256 
 257     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 258     __ movl(Address(rsi, 0), rax);
 259     __ movl(Address(rsi, 4), rbx);
 260     __ movl(Address(rsi, 8), rcx);
 261     __ movl(Address(rsi,12), rdx);
 262 
 263     //
 264     // cpuid(0x4) Deterministic cache params
 265     //
 266     __ bind(std_cpuid4);
 267     __ movl(rax, 4);
 268     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 269     __ jccb(Assembler::greater, std_cpuid1);
 270 
 271     __ xorl(rcx, rcx);   // L1 cache
 272     __ cpuid();
 273     __ push(rax);
 274     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 275     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 276     __ pop(rax);
 277     __ jccb(Assembler::equal, std_cpuid1);
 278 
 279     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 280     __ movl(Address(rsi, 0), rax);
 281     __ movl(Address(rsi, 4), rbx);
 282     __ movl(Address(rsi, 8), rcx);
 283     __ movl(Address(rsi,12), rdx);
 284 
 285     //
 286     // Standard cpuid(0x1)
 287     //
 288     __ bind(std_cpuid1);
 289     __ movl(rax, 1);
 290     __ cpuid();
 291     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 292     __ movl(Address(rsi, 0), rax);
 293     __ movl(Address(rsi, 4), rbx);
 294     __ movl(Address(rsi, 8), rcx);
 295     __ movl(Address(rsi,12), rdx);
 296 
 297     //
 298     // Check if OS has enabled XGETBV instruction to access XCR0
 299     // (OSXSAVE feature flag) and CPU supports AVX
 300     //
 301     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 302     __ cmpl(rcx, 0x18000000);
 303     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 304 
 305     //
 306     // XCR0, XFEATURE_ENABLED_MASK register
 307     //
 308     __ xorl(rcx, rcx);   // zero for XCR0 register
 309     __ xgetbv();
 310     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 311     __ movl(Address(rsi, 0), rax);
 312     __ movl(Address(rsi, 4), rdx);
 313 
 314     //
 315     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 316     //
 317     __ bind(sef_cpuid);
 318     __ movl(rax, 7);
 319     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 320     __ jccb(Assembler::greater, ext_cpuid);
 321     // ECX = 0
 322     __ xorl(rcx, rcx);
 323     __ cpuid();
 324     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 325     __ movl(Address(rsi, 0), rax);
 326     __ movl(Address(rsi, 4), rbx);
 327     __ movl(Address(rsi, 8), rcx);
 328     __ movl(Address(rsi, 12), rdx);
 329 
 330     //
 331     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 332     //
 333     __ bind(sefsl1_cpuid);
 334     __ movl(rax, 7);
 335     __ movl(rcx, 1);
 336     __ cpuid();
 337     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 338     __ movl(Address(rsi, 0), rax);
 339     __ movl(Address(rsi, 4), rdx);
 340 
 341     //
 342     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 343     //
 344     __ bind(std_cpuid24);
 345     __ movl(rax, 0x24);
 346     __ movl(rcx, 0);
 347     __ cpuid();
 348     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 349     __ movl(Address(rsi, 0), rax);
 350     __ movl(Address(rsi, 4), rbx);
 351 
 352     //
 353     // Extended cpuid(0x80000000)
 354     //
 355     __ bind(ext_cpuid);
 356     __ movl(rax, 0x80000000);
 357     __ cpuid();
 358     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 359     __ jcc(Assembler::belowEqual, done);
 360     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 361     __ jcc(Assembler::belowEqual, ext_cpuid1);
 362     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 363     __ jccb(Assembler::belowEqual, ext_cpuid5);
 364     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 365     __ jccb(Assembler::belowEqual, ext_cpuid7);
 366     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 367     __ jccb(Assembler::belowEqual, ext_cpuid8);
 368     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 369     __ jccb(Assembler::below, ext_cpuid8);
 370     //
 371     // Extended cpuid(0x8000001E)
 372     //
 373     __ movl(rax, 0x8000001E);
 374     __ cpuid();
 375     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 376     __ movl(Address(rsi, 0), rax);
 377     __ movl(Address(rsi, 4), rbx);
 378     __ movl(Address(rsi, 8), rcx);
 379     __ movl(Address(rsi,12), rdx);
 380 
 381     //
 382     // Extended cpuid(0x80000008)
 383     //
 384     __ bind(ext_cpuid8);
 385     __ movl(rax, 0x80000008);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000007)
 395     //
 396     __ bind(ext_cpuid7);
 397     __ movl(rax, 0x80000007);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000005)
 407     //
 408     __ bind(ext_cpuid5);
 409     __ movl(rax, 0x80000005);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000001)
 419     //
 420     __ bind(ext_cpuid1);
 421     __ movl(rax, 0x80000001);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Check if OS has enabled XGETBV instruction to access XCR0
 431     // (OSXSAVE feature flag) and CPU supports APX
 432     //
 433     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 434     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 436     __ movl(rax, 0x200000);
 437     __ andl(rax, Address(rsi, 4));
 438     __ jcc(Assembler::equal, vector_save_restore);
 439     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 440     __ movl(rax, 0x80000);
 441     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 442     __ jcc(Assembler::equal, vector_save_restore);
 443 
 444     bool save_apx = UseAPX;
 445     VM_Version::set_apx_cpuFeatures();
 446     UseAPX = true;
 447     __ mov64(r16, VM_Version::egpr_test_value());
 448     __ mov64(r31, VM_Version::egpr_test_value());
 449     __ xorl(rsi, rsi);
 450     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 451     // Generate SEGV
 452     __ movl(rax, Address(rsi, 0));
 453 
 454     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 455     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 456     __ movq(Address(rsi, 0), r16);
 457     __ movq(Address(rsi, 8), r31);
 458 
 459     UseAPX = save_apx;
 460     __ bind(vector_save_restore);
 461     //
 462     // Check if OS has enabled XGETBV instruction to access XCR0
 463     // (OSXSAVE feature flag) and CPU supports AVX
 464     //
 465     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 466     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 467     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 468     __ cmpl(rcx, 0x18000000);
 469     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 470 
 471     __ movl(rax, 0x6);
 472     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 473     __ cmpl(rax, 0x6);
 474     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 475 
 476     // we need to bridge farther than imm8, so we use this island as a thunk
 477     __ bind(done);
 478     __ jmp(wrapup);
 479 
 480     __ bind(start_simd_check);
 481     //
 482     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 483     // registers are not restored after a signal processing.
 484     // Generate SEGV here (reference through null)
 485     // and check upper YMM/ZMM bits after it.
 486     //
 487     int saved_useavx = UseAVX;
 488     int saved_usesse = UseSSE;
 489 
 490     // If UseAVX is uninitialized or is set by the user to include EVEX
 491     if (use_evex) {
 492       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 493       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 494       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 495       __ movl(rax, 0x10000);
 496       __ andl(rax, Address(rsi, 4));
 497       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 498       __ movl(rbx, 0x80000);
 499       __ andl(rbx, Address(rsi, 4));
 500       __ orl(rax, rbx);
 501       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 502       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 503       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 504       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 505       __ movl(rax, 0xE0);
 506       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 507       __ cmpl(rax, 0xE0);
 508       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 509 
 510       if (FLAG_IS_DEFAULT(UseAVX)) {
 511         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 512         __ movl(rax, Address(rsi, 0));
 513         __ cmpl(rax, 0x50654);              // If it is Skylake
 514         __ jcc(Assembler::equal, legacy_setup);
 515       }
 516       // EVEX setup: run in lowest evex mode
 517       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 518       UseAVX = 3;
 519       UseSSE = 2;
 520 #ifdef _WINDOWS
 521       // xmm5-xmm15 are not preserved by caller on windows
 522       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 523       __ subptr(rsp, 64);
 524       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 525       __ subptr(rsp, 64);
 526       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 527       __ subptr(rsp, 64);
 528       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 529 #endif // _WINDOWS
 530 
 531       // load value into all 64 bytes of zmm7 register
 532       __ movl(rcx, VM_Version::ymm_test_value());
 533       __ movdl(xmm0, rcx);
 534       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 535       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 536       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 537       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 538       VM_Version::clean_cpuFeatures();
 539       __ jmp(save_restore_except);
 540     }
 541 
 542     __ bind(legacy_setup);
 543     // AVX setup
 544     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 545     UseAVX = 1;
 546     UseSSE = 2;
 547 #ifdef _WINDOWS
 548     __ subptr(rsp, 32);
 549     __ vmovdqu(Address(rsp, 0), xmm7);
 550     __ subptr(rsp, 32);
 551     __ vmovdqu(Address(rsp, 0), xmm8);
 552     __ subptr(rsp, 32);
 553     __ vmovdqu(Address(rsp, 0), xmm15);
 554 #endif // _WINDOWS
 555 
 556     // load value into all 32 bytes of ymm7 register
 557     __ movl(rcx, VM_Version::ymm_test_value());
 558 
 559     __ movdl(xmm0, rcx);
 560     __ pshufd(xmm0, xmm0, 0x00);
 561     __ vinsertf128_high(xmm0, xmm0);
 562     __ vmovdqu(xmm7, xmm0);
 563     __ vmovdqu(xmm8, xmm0);
 564     __ vmovdqu(xmm15, xmm0);
 565     VM_Version::clean_cpuFeatures();
 566 
 567     __ bind(save_restore_except);
 568     __ xorl(rsi, rsi);
 569     VM_Version::set_cpuinfo_segv_addr(__ pc());
 570     // Generate SEGV
 571     __ movl(rax, Address(rsi, 0));
 572 
 573     VM_Version::set_cpuinfo_cont_addr(__ pc());
 574     // Returns here after signal. Save xmm0 to check it later.
 575 
 576     // If UseAVX is uninitialized or is set by the user to include EVEX
 577     if (use_evex) {
 578       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 579       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 580       __ movl(rax, 0x10000);
 581       __ andl(rax, Address(rsi, 4));
 582       __ jcc(Assembler::equal, legacy_save_restore);
 583       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 584       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 585       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 586       __ movl(rax, 0xE0);
 587       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 588       __ cmpl(rax, 0xE0);
 589       __ jcc(Assembler::notEqual, legacy_save_restore);
 590 
 591       if (FLAG_IS_DEFAULT(UseAVX)) {
 592         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 593         __ movl(rax, Address(rsi, 0));
 594         __ cmpl(rax, 0x50654);              // If it is Skylake
 595         __ jcc(Assembler::equal, legacy_save_restore);
 596       }
 597       // EVEX check: run in lowest evex mode
 598       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 599       UseAVX = 3;
 600       UseSSE = 2;
 601       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 602       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 603       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 604       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 605       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 606 
 607 #ifdef _WINDOWS
 608       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 609       __ addptr(rsp, 64);
 610       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 611       __ addptr(rsp, 64);
 612       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 613       __ addptr(rsp, 64);
 614 #endif // _WINDOWS
 615       generate_vzeroupper(wrapup);
 616       VM_Version::clean_cpuFeatures();
 617       UseAVX = saved_useavx;
 618       UseSSE = saved_usesse;
 619       __ jmp(wrapup);
 620    }
 621 
 622     __ bind(legacy_save_restore);
 623     // AVX check
 624     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 625     UseAVX = 1;
 626     UseSSE = 2;
 627     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 628     __ vmovdqu(Address(rsi, 0), xmm0);
 629     __ vmovdqu(Address(rsi, 32), xmm7);
 630     __ vmovdqu(Address(rsi, 64), xmm8);
 631     __ vmovdqu(Address(rsi, 96), xmm15);
 632 
 633 #ifdef _WINDOWS
 634     __ vmovdqu(xmm15, Address(rsp, 0));
 635     __ addptr(rsp, 32);
 636     __ vmovdqu(xmm8, Address(rsp, 0));
 637     __ addptr(rsp, 32);
 638     __ vmovdqu(xmm7, Address(rsp, 0));
 639     __ addptr(rsp, 32);
 640 #endif // _WINDOWS
 641 
 642     generate_vzeroupper(wrapup);
 643     VM_Version::clean_cpuFeatures();
 644     UseAVX = saved_useavx;
 645     UseSSE = saved_usesse;
 646 
 647     __ bind(wrapup);
 648     __ popf();
 649     __ pop(rsi);
 650     __ pop(rbx);
 651     __ pop(rbp);
 652     __ ret(0);
 653 
 654 #   undef __
 655 
 656     return start;
 657   };
 658   void generate_vzeroupper(Label& L_wrapup) {
 659 #   define __ _masm->
 660     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 661     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 662     __ jcc(Assembler::notEqual, L_wrapup);
 663     __ movl(rcx, 0x0FFF0FF0);
 664     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 665     __ andl(rcx, Address(rsi, 0));
 666     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 667     __ jcc(Assembler::equal, L_wrapup);
 668     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 669     __ jcc(Assembler::equal, L_wrapup);
 670     // vzeroupper() will use a pre-computed instruction sequence that we
 671     // can't compute until after we've determined CPU capabilities. Use
 672     // uncached variant here directly to be able to bootstrap correctly
 673     __ vzeroupper_uncached();
 674 #   undef __
 675   }
 676   address generate_detect_virt() {
 677     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 678 #   define __ _masm->
 679 
 680     address start = __ pc();
 681 
 682     // Evacuate callee-saved registers
 683     __ push(rbp);
 684     __ push(rbx);
 685     __ push(rsi); // for Windows
 686 
 687     __ mov(rax, c_rarg0); // CPUID leaf
 688     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 689 
 690     __ cpuid();
 691 
 692     // Store result to register array
 693     __ movl(Address(rsi,  0), rax);
 694     __ movl(Address(rsi,  4), rbx);
 695     __ movl(Address(rsi,  8), rcx);
 696     __ movl(Address(rsi, 12), rdx);
 697 
 698     // Epilogue
 699     __ pop(rsi);
 700     __ pop(rbx);
 701     __ pop(rbp);
 702     __ ret(0);
 703 
 704 #   undef __
 705 
 706     return start;
 707   };
 708 
 709 
 710   address generate_getCPUIDBrandString(void) {
 711     // Flags to test CPU type.
 712     const uint32_t HS_EFL_AC           = 0x40000;
 713     const uint32_t HS_EFL_ID           = 0x200000;
 714     // Values for when we don't have a CPUID instruction.
 715     const int      CPU_FAMILY_SHIFT = 8;
 716     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 717     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 718 
 719     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 720 
 721     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 722 #   define __ _masm->
 723 
 724     address start = __ pc();
 725 
 726     //
 727     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 728     //
 729     // rcx and rdx are first and second argument registers on windows
 730 
 731     __ push(rbp);
 732     __ mov(rbp, c_rarg0); // cpuid_info address
 733     __ push(rbx);
 734     __ push(rsi);
 735     __ pushf();          // preserve rbx, and flags
 736     __ pop(rax);
 737     __ push(rax);
 738     __ mov(rcx, rax);
 739     //
 740     // if we are unable to change the AC flag, we have a 386
 741     //
 742     __ xorl(rax, HS_EFL_AC);
 743     __ push(rax);
 744     __ popf();
 745     __ pushf();
 746     __ pop(rax);
 747     __ cmpptr(rax, rcx);
 748     __ jccb(Assembler::notEqual, detect_486);
 749 
 750     __ movl(rax, CPU_FAMILY_386);
 751     __ jmp(done);
 752 
 753     //
 754     // If we are unable to change the ID flag, we have a 486 which does
 755     // not support the "cpuid" instruction.
 756     //
 757     __ bind(detect_486);
 758     __ mov(rax, rcx);
 759     __ xorl(rax, HS_EFL_ID);
 760     __ push(rax);
 761     __ popf();
 762     __ pushf();
 763     __ pop(rax);
 764     __ cmpptr(rcx, rax);
 765     __ jccb(Assembler::notEqual, detect_586);
 766 
 767     __ bind(cpu486);
 768     __ movl(rax, CPU_FAMILY_486);
 769     __ jmp(done);
 770 
 771     //
 772     // At this point, we have a chip which supports the "cpuid" instruction
 773     //
 774     __ bind(detect_586);
 775     __ xorl(rax, rax);
 776     __ cpuid();
 777     __ orl(rax, rax);
 778     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 779                                         // value of at least 1, we give up and
 780                                         // assume a 486
 781 
 782     //
 783     // Extended cpuid(0x80000000) for processor brand string detection
 784     //
 785     __ bind(ext_cpuid);
 786     __ movl(rax, CPUID_EXTENDED_FN);
 787     __ cpuid();
 788     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 789     __ jcc(Assembler::below, done);
 790 
 791     //
 792     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 793     //
 794     __ movl(rax, CPUID_EXTENDED_FN_2);
 795     __ cpuid();
 796     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 797     __ movl(Address(rsi, 0), rax);
 798     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 799     __ movl(Address(rsi, 0), rbx);
 800     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 801     __ movl(Address(rsi, 0), rcx);
 802     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 803     __ movl(Address(rsi,0), rdx);
 804 
 805     //
 806     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 807     //
 808     __ movl(rax, CPUID_EXTENDED_FN_3);
 809     __ cpuid();
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 811     __ movl(Address(rsi, 0), rax);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 813     __ movl(Address(rsi, 0), rbx);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 815     __ movl(Address(rsi, 0), rcx);
 816     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 817     __ movl(Address(rsi,0), rdx);
 818 
 819     //
 820     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 821     //
 822     __ movl(rax, CPUID_EXTENDED_FN_4);
 823     __ cpuid();
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 825     __ movl(Address(rsi, 0), rax);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 827     __ movl(Address(rsi, 0), rbx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 829     __ movl(Address(rsi, 0), rcx);
 830     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 831     __ movl(Address(rsi,0), rdx);
 832 
 833     //
 834     // return
 835     //
 836     __ bind(done);
 837     __ popf();
 838     __ pop(rsi);
 839     __ pop(rbx);
 840     __ pop(rbp);
 841     __ ret(0);
 842 
 843 #   undef __
 844 
 845     return start;
 846   };
 847 };
 848 
 849 void VM_Version::get_processor_features() {
 850 
 851   _cpu = 4; // 486 by default
 852   _model = 0;
 853   _stepping = 0;
 854   _logical_processors_per_package = 1;
 855   // i486 internal cache is both I&D and has a 16-byte line size
 856   _L1_data_cache_line_size = 16;
 857 
 858   // Get raw processor info
 859 
 860   get_cpu_info_stub(&_cpuid_info);
 861 
 862   assert_is_initialized();
 863   _cpu = extended_cpu_family();
 864   _model = extended_cpu_model();
 865   _stepping = cpu_stepping();
 866 
 867   if (cpu_family() > 4) { // it supports CPUID
 868     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 869     _cpu_features = _features; // Preserve features
 870     // Logical processors are only available on P4s and above,
 871     // and only if hyperthreading is available.
 872     _logical_processors_per_package = logical_processor_count();
 873     _L1_data_cache_line_size = L1_line_size();
 874   }
 875 
 876   // xchg and xadd instructions
 877   _supports_atomic_getset4 = true;
 878   _supports_atomic_getadd4 = true;
 879   _supports_atomic_getset8 = true;
 880   _supports_atomic_getadd8 = true;
 881 
 882   // OS should support SSE for x64 and hardware should support at least SSE2.
 883   if (!VM_Version::supports_sse2()) {
 884     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 885   }
 886   // in 64 bit the use of SSE2 is the minimum
 887   if (UseSSE < 2) UseSSE = 2;
 888 
 889   // flush_icache_stub have to be generated first.
 890   // That is why Icache line size is hard coded in ICache class,
 891   // see icache_x86.hpp. It is also the reason why we can't use
 892   // clflush instruction in 32-bit VM since it could be running
 893   // on CPU which does not support it.
 894   //
 895   // The only thing we can do is to verify that flushed
 896   // ICache::line_size has correct value.
 897   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 898   // clflush_size is size in quadwords (8 bytes).
 899   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 900 
 901   // assigning this field effectively enables Unsafe.writebackMemory()
 902   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 903   // that is only implemented on x86_64 and only if the OS plays ball
 904   if (os::supports_map_sync()) {
 905     // publish data cache line flush size to generic field, otherwise
 906     // let if default to zero thereby disabling writeback
 907     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 908   }
 909 
 910   // Check if processor has Intel Ecore
 911   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 912     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 913       _model == 0xCC || _model == 0xDD)) {
 914     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 915   }
 916 
 917   if (UseSSE < 4) {
 918     _features.clear_feature(CPU_SSE4_1);
 919     _features.clear_feature(CPU_SSE4_2);
 920   }
 921 
 922   if (UseSSE < 3) {
 923     _features.clear_feature(CPU_SSE3);
 924     _features.clear_feature(CPU_SSSE3);
 925     _features.clear_feature(CPU_SSE4A);
 926   }
 927 
 928   if (UseSSE < 2)
 929     _features.clear_feature(CPU_SSE2);
 930 
 931   if (UseSSE < 1)
 932     _features.clear_feature(CPU_SSE);
 933 
 934   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 935   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 936     UseAVX = 0;
 937   }
 938 
 939   // UseSSE is set to the smaller of what hardware supports and what
 940   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 941   // older Pentiums which do not support it.
 942   int use_sse_limit = 0;
 943   if (UseSSE > 0) {
 944     if (UseSSE > 3 && supports_sse4_1()) {
 945       use_sse_limit = 4;
 946     } else if (UseSSE > 2 && supports_sse3()) {
 947       use_sse_limit = 3;
 948     } else if (UseSSE > 1 && supports_sse2()) {
 949       use_sse_limit = 2;
 950     } else if (UseSSE > 0 && supports_sse()) {
 951       use_sse_limit = 1;
 952     } else {
 953       use_sse_limit = 0;
 954     }
 955   }
 956   if (FLAG_IS_DEFAULT(UseSSE)) {
 957     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 958   } else if (UseSSE > use_sse_limit) {
 959     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 960     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 961   }
 962 
 963   // first try initial setting and detect what we can support
 964   int use_avx_limit = 0;
 965   if (UseAVX > 0) {
 966     if (UseSSE < 4) {
 967       // Don't use AVX if SSE is unavailable or has been disabled.
 968       use_avx_limit = 0;
 969     } else if (UseAVX > 2 && supports_evex()) {
 970       use_avx_limit = 3;
 971     } else if (UseAVX > 1 && supports_avx2()) {
 972       use_avx_limit = 2;
 973     } else if (UseAVX > 0 && supports_avx()) {
 974       use_avx_limit = 1;
 975     } else {
 976       use_avx_limit = 0;
 977     }
 978   }
 979   if (FLAG_IS_DEFAULT(UseAVX)) {
 980     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 981     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 982       FLAG_SET_DEFAULT(UseAVX, 2);
 983     } else {
 984       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 985     }
 986   }
 987 
 988   if (UseAVX > use_avx_limit) {
 989     if (UseSSE < 4) {
 990       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 991     } else {
 992       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 993     }
 994     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 995   }
 996 
 997   if (UseAVX < 3) {
 998     _features.clear_feature(CPU_AVX512F);
 999     _features.clear_feature(CPU_AVX512DQ);
1000     _features.clear_feature(CPU_AVX512CD);
1001     _features.clear_feature(CPU_AVX512BW);
1002     _features.clear_feature(CPU_AVX512ER);
1003     _features.clear_feature(CPU_AVX512PF);
1004     _features.clear_feature(CPU_AVX512VL);
1005     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1006     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1007     _features.clear_feature(CPU_AVX512_VAES);
1008     _features.clear_feature(CPU_AVX512_VNNI);
1009     _features.clear_feature(CPU_AVX512_VBMI);
1010     _features.clear_feature(CPU_AVX512_VBMI2);
1011     _features.clear_feature(CPU_AVX512_BITALG);
1012     _features.clear_feature(CPU_AVX512_IFMA);
1013     _features.clear_feature(CPU_APX_F);
1014     _features.clear_feature(CPU_AVX512_FP16);
1015     _features.clear_feature(CPU_AVX10_1);
1016     _features.clear_feature(CPU_AVX10_2);
1017   }
1018 
1019   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1020   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1021   if (UseAPX && !apx_supported) {
1022     warning("UseAPX is not supported on this CPU, setting it to false");
1023     FLAG_SET_DEFAULT(UseAPX, false);
1024   }
1025 
1026   if (!UseAPX) {
1027     _features.clear_feature(CPU_APX_F);
1028   }
1029 
1030   if (UseAVX < 2) {
1031     _features.clear_feature(CPU_AVX2);
1032     _features.clear_feature(CPU_AVX_IFMA);
1033   }
1034 
1035   if (UseAVX < 1) {
1036     _features.clear_feature(CPU_AVX);
1037     _features.clear_feature(CPU_VZEROUPPER);
1038     _features.clear_feature(CPU_F16C);
1039     _features.clear_feature(CPU_SHA512);
1040   }
1041 
1042   if (logical_processors_per_package() == 1) {
1043     // HT processor could be installed on a system which doesn't support HT.
1044     _features.clear_feature(CPU_HT);
1045   }
1046 
1047   if (is_intel()) { // Intel cpus specific settings
1048     if (is_knights_family()) {
1049       _features.clear_feature(CPU_VZEROUPPER);
1050       _features.clear_feature(CPU_AVX512BW);
1051       _features.clear_feature(CPU_AVX512VL);
1052       _features.clear_feature(CPU_AVX512DQ);
1053       _features.clear_feature(CPU_AVX512_VNNI);
1054       _features.clear_feature(CPU_AVX512_VAES);
1055       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1056       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1057       _features.clear_feature(CPU_AVX512_VBMI);
1058       _features.clear_feature(CPU_AVX512_VBMI2);
1059       _features.clear_feature(CPU_CLWB);
1060       _features.clear_feature(CPU_FLUSHOPT);
1061       _features.clear_feature(CPU_GFNI);
1062       _features.clear_feature(CPU_AVX512_BITALG);
1063       _features.clear_feature(CPU_AVX512_IFMA);
1064       _features.clear_feature(CPU_AVX_IFMA);
1065       _features.clear_feature(CPU_AVX512_FP16);
1066       _features.clear_feature(CPU_AVX10_1);
1067       _features.clear_feature(CPU_AVX10_2);
1068     }
1069   }
1070 
1071   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1072     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1073     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1074   } else {
1075     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1076   }
1077 
1078   assert(supports_clflush(), "Always present");
1079   if (X86ICacheSync == -1) {
1080     // Auto-detect, choosing the best performant one that still flushes
1081     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1082     if (supports_clwb()) {
1083       FLAG_SET_ERGO(X86ICacheSync, 3);
1084     } else if (supports_clflushopt()) {
1085       FLAG_SET_ERGO(X86ICacheSync, 2);
1086     } else {
1087       FLAG_SET_ERGO(X86ICacheSync, 1);
1088     }
1089   } else {
1090     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1091       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1092     }
1093     if ((X86ICacheSync == 3) && !supports_clwb()) {
1094       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1095     }
1096     if ((X86ICacheSync == 5) && !supports_serialize()) {
1097       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1098     }
1099   }
1100 
1101   stringStream ss(2048);
1102   if (supports_hybrid()) {
1103     ss.print("(hybrid)");
1104   } else {
1105     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1106   }
1107   ss.print(" family %d model %d stepping %d microcode 0x%x",
1108            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1109   ss.print(", ");
1110   int features_offset = (int)ss.size();
1111   insert_features_names(_features, ss);
1112 
1113   _cpu_info_string = ss.as_string(true);
1114   _features_string = _cpu_info_string + features_offset;
1115 
1116   // Use AES instructions if available.
1117   if (supports_aes()) {
1118     if (FLAG_IS_DEFAULT(UseAES)) {
1119       FLAG_SET_DEFAULT(UseAES, true);
1120     }
1121     if (!UseAES) {
1122       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1123         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1124       }
1125       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1126     } else {
1127       if (UseSSE > 2) {
1128         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1129           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1130         }
1131       } else {
1132         // The AES intrinsic stubs require AES instruction support (of course)
1133         // but also require sse3 mode or higher for instructions it use.
1134         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1135           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1136         }
1137         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1138       }
1139 
1140       // --AES-CTR begins--
1141       if (!UseAESIntrinsics) {
1142         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1143           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1144           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1145         }
1146       } else {
1147         if (supports_sse4_1()) {
1148           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1149             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1150           }
1151         } else {
1152            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1153            // but also require sse4.1 mode or higher for instructions it use.
1154           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1155              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1156            }
1157            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1158         }
1159       }
1160       // --AES-CTR ends--
1161     }
1162   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1163     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1164       warning("AES instructions are not available on this CPU");
1165       FLAG_SET_DEFAULT(UseAES, false);
1166     }
1167     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1168       warning("AES intrinsics are not available on this CPU");
1169       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1170     }
1171     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1172       warning("AES-CTR intrinsics are not available on this CPU");
1173       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1174     }
1175   }
1176 
1177   // Use CLMUL instructions if available.
1178   if (supports_clmul()) {
1179     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1180       UseCLMUL = true;
1181     }
1182   } else if (UseCLMUL) {
1183     if (!FLAG_IS_DEFAULT(UseCLMUL))
1184       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1185     FLAG_SET_DEFAULT(UseCLMUL, false);
1186   }
1187 
1188   if (UseCLMUL && (UseSSE > 2)) {
1189     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1190       UseCRC32Intrinsics = true;
1191     }
1192   } else if (UseCRC32Intrinsics) {
1193     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1194       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1195     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1196   }
1197 
1198   if (supports_avx2()) {
1199     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1200       UseAdler32Intrinsics = true;
1201     }
1202   } else if (UseAdler32Intrinsics) {
1203     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1204       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1205     }
1206     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1207   }
1208 
1209   if (supports_sse4_2() && supports_clmul()) {
1210     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1211       UseCRC32CIntrinsics = true;
1212     }
1213   } else if (UseCRC32CIntrinsics) {
1214     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1215       warning("CRC32C intrinsics are not available on this CPU");
1216     }
1217     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1218   }
1219 
1220   // GHASH/GCM intrinsics
1221   if (UseCLMUL && (UseSSE > 2)) {
1222     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1223       UseGHASHIntrinsics = true;
1224     }
1225   } else if (UseGHASHIntrinsics) {
1226     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1227       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1228     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1229   }
1230 
1231   // ChaCha20 Intrinsics
1232   // As long as the system supports AVX as a baseline we can do a
1233   // SIMD-enabled block function.  StubGenerator makes the determination
1234   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1235   // version.
1236   if (UseAVX >= 1) {
1237       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1238           UseChaCha20Intrinsics = true;
1239       }
1240   } else if (UseChaCha20Intrinsics) {
1241       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1242           warning("ChaCha20 intrinsic requires AVX instructions");
1243       }
1244       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1245   }
1246 
1247   // Kyber Intrinsics
1248   // Currently we only have them for AVX512
1249 #ifdef _LP64
1250   if (supports_evex() && supports_avx512bw()) {
1251       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1252           UseKyberIntrinsics = true;
1253       }
1254   } else
1255 #endif
1256   if (UseKyberIntrinsics) {
1257      warning("Intrinsics for ML-KEM are not available on this CPU.");
1258      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1259   }
1260 
1261   // Dilithium Intrinsics
1262   // Currently we only have them for AVX512
1263   if (supports_evex() && supports_avx512bw()) {
1264       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1265           UseDilithiumIntrinsics = true;
1266       }
1267   } else if (UseDilithiumIntrinsics) {
1268       warning("Intrinsics for ML-DSA are not available on this CPU.");
1269       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1270   }
1271 
1272   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1273   if (UseAVX >= 2) {
1274     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1275       UseBASE64Intrinsics = true;
1276     }
1277   } else if (UseBASE64Intrinsics) {
1278      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1279       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1280     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1281   }
1282 
1283   if (supports_fma()) {
1284     if (FLAG_IS_DEFAULT(UseFMA)) {
1285       UseFMA = true;
1286     }
1287   } else if (UseFMA) {
1288     warning("FMA instructions are not available on this CPU");
1289     FLAG_SET_DEFAULT(UseFMA, false);
1290   }
1291 
1292   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1293     UseMD5Intrinsics = true;
1294   }
1295 
1296   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1297     if (FLAG_IS_DEFAULT(UseSHA)) {
1298       UseSHA = true;
1299     }
1300   } else if (UseSHA) {
1301     warning("SHA instructions are not available on this CPU");
1302     FLAG_SET_DEFAULT(UseSHA, false);
1303   }
1304 
1305   if (supports_sha() && supports_sse4_1() && UseSHA) {
1306     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1307       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1308     }
1309   } else if (UseSHA1Intrinsics) {
1310     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1311     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1312   }
1313 
1314   if (supports_sse4_1() && UseSHA) {
1315     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1316       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1317     }
1318   } else if (UseSHA256Intrinsics) {
1319     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1320     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1321   }
1322 
1323   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1324     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1325       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1326     }
1327   } else if (UseSHA512Intrinsics) {
1328     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1329     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1330   }
1331 
1332   if (supports_evex() && supports_avx512bw()) {
1333       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1334           UseSHA3Intrinsics = true;
1335       }
1336   } else if (UseSHA3Intrinsics) {
1337       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1338       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1339   }
1340 
1341   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1342     FLAG_SET_DEFAULT(UseSHA, false);
1343   }
1344 
1345 #if COMPILER2_OR_JVMCI
1346   int max_vector_size = 0;
1347   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1348     // 16 byte vectors (in XMM) are supported with SSE2+
1349     max_vector_size = 16;
1350   } else if (UseAVX == 1 || UseAVX == 2) {
1351     // 32 bytes vectors (in YMM) are only supported with AVX+
1352     max_vector_size = 32;
1353   } else if (UseAVX > 2) {
1354     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1355     max_vector_size = 64;
1356   }
1357 
1358   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1359 
1360   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1361     if (MaxVectorSize < min_vector_size) {
1362       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1363       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1364     }
1365     if (MaxVectorSize > max_vector_size) {
1366       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1367       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1368     }
1369     if (!is_power_of_2(MaxVectorSize)) {
1370       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1371       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1372     }
1373   } else {
1374     // If default, use highest supported configuration
1375     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1376   }
1377 
1378 #if defined(COMPILER2) && defined(ASSERT)
1379   if (MaxVectorSize > 0) {
1380     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1381       tty->print_cr("State of YMM registers after signal handle:");
1382       int nreg = 4;
1383       const char* ymm_name[4] = {"0", "7", "8", "15"};
1384       for (int i = 0; i < nreg; i++) {
1385         tty->print("YMM%s:", ymm_name[i]);
1386         for (int j = 7; j >=0; j--) {
1387           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1388         }
1389         tty->cr();
1390       }
1391     }
1392   }
1393 #endif // COMPILER2 && ASSERT
1394 
1395   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1396     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1397       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1398     }
1399   } else if (UsePoly1305Intrinsics) {
1400     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1401     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1402   }
1403 
1404   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1405     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1406       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1407     }
1408   } else if (UseIntPolyIntrinsics) {
1409     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1410     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1411   }
1412 
1413   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1414     UseMultiplyToLenIntrinsic = true;
1415   }
1416   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1417     UseSquareToLenIntrinsic = true;
1418   }
1419   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1420     UseMulAddIntrinsic = true;
1421   }
1422   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1423     UseMontgomeryMultiplyIntrinsic = true;
1424   }
1425   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1426     UseMontgomerySquareIntrinsic = true;
1427   }
1428 #endif // COMPILER2_OR_JVMCI
1429 
1430   // On new cpus instructions which update whole XMM register should be used
1431   // to prevent partial register stall due to dependencies on high half.
1432   //
1433   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1434   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1435   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1436   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1437 
1438 
1439   if (is_zx()) { // ZX cpus specific settings
1440     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1441       UseStoreImmI16 = false; // don't use it on ZX cpus
1442     }
1443     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1444       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1445         // Use it on all ZX cpus
1446         UseAddressNop = true;
1447       }
1448     }
1449     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1450       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1451     }
1452     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1453       if (supports_sse3()) {
1454         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1455       } else {
1456         UseXmmRegToRegMoveAll = false;
1457       }
1458     }
1459     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1460 #ifdef COMPILER2
1461       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1462         // For new ZX cpus do the next optimization:
1463         // don't align the beginning of a loop if there are enough instructions
1464         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1465         // in current fetch line (OptoLoopAlignment) or the padding
1466         // is big (> MaxLoopPad).
1467         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1468         // generated NOP instructions. 11 is the largest size of one
1469         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1470         MaxLoopPad = 11;
1471       }
1472 #endif // COMPILER2
1473       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1474         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1475       }
1476       if (supports_sse4_2()) { // new ZX cpus
1477         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1478           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1479         }
1480       }
1481     }
1482 
1483     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1484       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1485     }
1486   }
1487 
1488   if (is_amd_family()) { // AMD cpus specific settings
1489     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1490       // Use it on new AMD cpus starting from Opteron.
1491       UseAddressNop = true;
1492     }
1493     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1494       // Use it on new AMD cpus starting from Opteron.
1495       UseNewLongLShift = true;
1496     }
1497     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1498       if (supports_sse4a()) {
1499         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1500       } else {
1501         UseXmmLoadAndClearUpper = false;
1502       }
1503     }
1504     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1505       if (supports_sse4a()) {
1506         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1507       } else {
1508         UseXmmRegToRegMoveAll = false;
1509       }
1510     }
1511     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1512       if (supports_sse4a()) {
1513         UseXmmI2F = true;
1514       } else {
1515         UseXmmI2F = false;
1516       }
1517     }
1518     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1519       if (supports_sse4a()) {
1520         UseXmmI2D = true;
1521       } else {
1522         UseXmmI2D = false;
1523       }
1524     }
1525 
1526     // some defaults for AMD family 15h
1527     if (cpu_family() == 0x15) {
1528       // On family 15h processors default is no sw prefetch
1529       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1530         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1531       }
1532       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1533       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1534         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1535       }
1536       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1537       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1538         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1539       }
1540       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1541         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1542       }
1543     }
1544 
1545 #ifdef COMPILER2
1546     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1547       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1548       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1549     }
1550 #endif // COMPILER2
1551 
1552     // Some defaults for AMD family >= 17h && Hygon family 18h
1553     if (cpu_family() >= 0x17) {
1554       // On family >=17h processors use XMM and UnalignedLoadStores
1555       // for Array Copy
1556       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1557         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1558       }
1559       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1560         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1561       }
1562 #ifdef COMPILER2
1563       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1564         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1565       }
1566 #endif
1567     }
1568   }
1569 
1570   if (is_intel()) { // Intel cpus specific settings
1571     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1572       UseStoreImmI16 = false; // don't use it on Intel cpus
1573     }
1574     if (is_intel_server_family() || cpu_family() == 15) {
1575       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1576         // Use it on all Intel cpus starting from PentiumPro
1577         UseAddressNop = true;
1578       }
1579     }
1580     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1581       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1582     }
1583     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1584       if (supports_sse3()) {
1585         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1586       } else {
1587         UseXmmRegToRegMoveAll = false;
1588       }
1589     }
1590     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1591 #ifdef COMPILER2
1592       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1593         // For new Intel cpus do the next optimization:
1594         // don't align the beginning of a loop if there are enough instructions
1595         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1596         // in current fetch line (OptoLoopAlignment) or the padding
1597         // is big (> MaxLoopPad).
1598         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1599         // generated NOP instructions. 11 is the largest size of one
1600         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1601         MaxLoopPad = 11;
1602       }
1603 #endif // COMPILER2
1604 
1605       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1606         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1607       }
1608       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1609         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1610           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1611         }
1612       }
1613     }
1614     if (is_atom_family() || is_knights_family()) {
1615 #ifdef COMPILER2
1616       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1617         OptoScheduling = true;
1618       }
1619 #endif
1620       if (supports_sse4_2()) { // Silvermont
1621         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1622           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1623         }
1624       }
1625       if (FLAG_IS_DEFAULT(UseIncDec)) {
1626         FLAG_SET_DEFAULT(UseIncDec, false);
1627       }
1628     }
1629     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1630       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1631     }
1632 #ifdef COMPILER2
1633     if (UseAVX > 2) {
1634       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1635           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1636            ArrayOperationPartialInlineSize != 0 &&
1637            ArrayOperationPartialInlineSize != 16 &&
1638            ArrayOperationPartialInlineSize != 32 &&
1639            ArrayOperationPartialInlineSize != 64)) {
1640         int inline_size = 0;
1641         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1642           inline_size = 64;
1643         } else if (MaxVectorSize >= 32) {
1644           inline_size = 32;
1645         } else if (MaxVectorSize >= 16) {
1646           inline_size = 16;
1647         }
1648         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1649           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1650         }
1651         ArrayOperationPartialInlineSize = inline_size;
1652       }
1653 
1654       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1655         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1656         if (ArrayOperationPartialInlineSize) {
1657           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1658         } else {
1659           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1660         }
1661       }
1662     }
1663 #endif
1664   }
1665 
1666 #ifdef COMPILER2
1667   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1668     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1669       OptimizeFill = false;
1670     }
1671   }
1672 #endif
1673   if (supports_sse4_2()) {
1674     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1675       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1676     }
1677   } else {
1678     if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1679       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1680     }
1681     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1682   }
1683   if (UseSSE42Intrinsics) {
1684     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1685       UseVectorizedMismatchIntrinsic = true;
1686     }
1687   } else if (UseVectorizedMismatchIntrinsic) {
1688     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1689       warning("vectorizedMismatch intrinsics are not available on this CPU");
1690     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1691   }
1692   if (UseAVX >= 2) {
1693     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1694   } else if (UseVectorizedHashCodeIntrinsic) {
1695     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1696       warning("vectorizedHashCode intrinsics are not available on this CPU");
1697     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1698   }
1699 
1700   // Use count leading zeros count instruction if available.
1701   if (supports_lzcnt()) {
1702     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1703       UseCountLeadingZerosInstruction = true;
1704     }
1705    } else if (UseCountLeadingZerosInstruction) {
1706     warning("lzcnt instruction is not available on this CPU");
1707     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1708   }
1709 
1710   // Use count trailing zeros instruction if available
1711   if (supports_bmi1()) {
1712     // tzcnt does not require VEX prefix
1713     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1714       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1715         // Don't use tzcnt if BMI1 is switched off on command line.
1716         UseCountTrailingZerosInstruction = false;
1717       } else {
1718         UseCountTrailingZerosInstruction = true;
1719       }
1720     }
1721   } else if (UseCountTrailingZerosInstruction) {
1722     warning("tzcnt instruction is not available on this CPU");
1723     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1724   }
1725 
1726   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1727   // VEX prefix is generated only when AVX > 0.
1728   if (supports_bmi1() && supports_avx()) {
1729     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1730       UseBMI1Instructions = true;
1731     }
1732   } else if (UseBMI1Instructions) {
1733     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1734     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1735   }
1736 
1737   if (supports_bmi2() && supports_avx()) {
1738     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1739       UseBMI2Instructions = true;
1740     }
1741   } else if (UseBMI2Instructions) {
1742     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1743     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1744   }
1745 
1746   // Use population count instruction if available.
1747   if (supports_popcnt()) {
1748     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1749       UsePopCountInstruction = true;
1750     }
1751   } else if (UsePopCountInstruction) {
1752     warning("POPCNT instruction is not available on this CPU");
1753     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1754   }
1755 
1756   // Use fast-string operations if available.
1757   if (supports_erms()) {
1758     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1759       UseFastStosb = true;
1760     }
1761   } else if (UseFastStosb) {
1762     warning("fast-string operations are not available on this CPU");
1763     FLAG_SET_DEFAULT(UseFastStosb, false);
1764   }
1765 
1766   // For AMD Processors use XMM/YMM MOVDQU instructions
1767   // for Object Initialization as default
1768   if (is_amd() && cpu_family() >= 0x19) {
1769     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1770       UseFastStosb = false;
1771     }
1772   }
1773 
1774 #ifdef COMPILER2
1775   if (is_intel() && MaxVectorSize > 16) {
1776     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1777       UseFastStosb = false;
1778     }
1779   }
1780 #endif
1781 
1782   // Use XMM/YMM MOVDQU instruction for Object Initialization
1783   if (!UseFastStosb && UseUnalignedLoadStores) {
1784     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1785       UseXMMForObjInit = true;
1786     }
1787   } else if (UseXMMForObjInit) {
1788     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1789     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1790   }
1791 
1792 #ifdef COMPILER2
1793   if (FLAG_IS_DEFAULT(AlignVector)) {
1794     // Modern processors allow misaligned memory operations for vectors.
1795     AlignVector = !UseUnalignedLoadStores;
1796   }
1797 #endif // COMPILER2
1798 
1799   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1800     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1801       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1802     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1803       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1804     }
1805   }
1806 
1807   // Allocation prefetch settings
1808   int cache_line_size = checked_cast<int>(prefetch_data_size());
1809   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1810       (cache_line_size > AllocatePrefetchStepSize)) {
1811     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1812   }
1813 
1814   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1815     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1816     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1817       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1818     }
1819     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1820   }
1821 
1822   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1823     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1824     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1825   }
1826 
1827   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1828     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1829         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1830       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1831     }
1832 #ifdef COMPILER2
1833     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1834       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1835     }
1836 #endif
1837   }
1838 
1839   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1840 #ifdef COMPILER2
1841     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1842       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1843     }
1844 #endif
1845   }
1846 
1847   // Prefetch settings
1848 
1849   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1850   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1851   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1852   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1853 
1854   // gc copy/scan is disabled if prefetchw isn't supported, because
1855   // Prefetch::write emits an inlined prefetchw on Linux.
1856   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1857   // The used prefetcht0 instruction works for both amd64 and em64t.
1858 
1859   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1860     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1861   }
1862   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1863     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1864   }
1865 
1866   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1867      (cache_line_size > ContendedPaddingWidth))
1868      ContendedPaddingWidth = cache_line_size;
1869 
1870   // This machine allows unaligned memory accesses
1871   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1872     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1873   }
1874 
1875 #ifndef PRODUCT
1876   if (log_is_enabled(Info, os, cpu)) {
1877     LogStream ls(Log(os, cpu)::info());
1878     outputStream* log = &ls;
1879     log->print_cr("Logical CPUs per core: %u",
1880                   logical_processors_per_package());
1881     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1882     log->print("UseSSE=%d", UseSSE);
1883     if (UseAVX > 0) {
1884       log->print("  UseAVX=%d", UseAVX);
1885     }
1886     if (UseAES) {
1887       log->print("  UseAES=1");
1888     }
1889 #ifdef COMPILER2
1890     if (MaxVectorSize > 0) {
1891       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1892     }
1893 #endif
1894     log->cr();
1895     log->print("Allocation");
1896     if (AllocatePrefetchStyle <= 0) {
1897       log->print_cr(": no prefetching");
1898     } else {
1899       log->print(" prefetching: ");
1900       if (AllocatePrefetchInstr == 0) {
1901         log->print("PREFETCHNTA");
1902       } else if (AllocatePrefetchInstr == 1) {
1903         log->print("PREFETCHT0");
1904       } else if (AllocatePrefetchInstr == 2) {
1905         log->print("PREFETCHT2");
1906       } else if (AllocatePrefetchInstr == 3) {
1907         log->print("PREFETCHW");
1908       }
1909       if (AllocatePrefetchLines > 1) {
1910         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1911       } else {
1912         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1913       }
1914     }
1915 
1916     if (PrefetchCopyIntervalInBytes > 0) {
1917       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1918     }
1919     if (PrefetchScanIntervalInBytes > 0) {
1920       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1921     }
1922     if (ContendedPaddingWidth > 0) {
1923       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1924     }
1925   }
1926 #endif // !PRODUCT
1927   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1928       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1929   }
1930   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1931       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1932   }
1933 }
1934 
1935 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1936   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1937   if (vrt == XenHVM) {
1938     st->print_cr("Xen hardware-assisted virtualization detected");
1939   } else if (vrt == KVM) {
1940     st->print_cr("KVM virtualization detected");
1941   } else if (vrt == VMWare) {
1942     st->print_cr("VMWare virtualization detected");
1943     VirtualizationSupport::print_virtualization_info(st);
1944   } else if (vrt == HyperV) {
1945     st->print_cr("Hyper-V virtualization detected");
1946   } else if (vrt == HyperVRole) {
1947     st->print_cr("Hyper-V role detected");
1948   }
1949 }
1950 
1951 bool VM_Version::compute_has_intel_jcc_erratum() {
1952   if (!is_intel_family_core()) {
1953     // Only Intel CPUs are affected.
1954     return false;
1955   }
1956   // The following table of affected CPUs is based on the following document released by Intel:
1957   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1958   switch (_model) {
1959   case 0x8E:
1960     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1961     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1962     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1963     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1964     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1965     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1966     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1967     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1968     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1969     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1970   case 0x4E:
1971     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1972     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1973     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1974     return _stepping == 0x3;
1975   case 0x55:
1976     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1977     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1978     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1979     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1980     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1981     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1982     return _stepping == 0x4 || _stepping == 0x7;
1983   case 0x5E:
1984     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1985     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1986     return _stepping == 0x3;
1987   case 0x9E:
1988     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1989     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1990     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1991     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1992     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1993     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1994     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1995     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1996     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1997     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1998     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1999     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2000     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2001     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2002     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2003   case 0xA5:
2004     // Not in Intel documentation.
2005     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2006     return true;
2007   case 0xA6:
2008     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2009     return _stepping == 0x0;
2010   case 0xAE:
2011     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2012     return _stepping == 0xA;
2013   default:
2014     // If we are running on another intel machine not recognized in the table, we are okay.
2015     return false;
2016   }
2017 }
2018 
2019 // On Xen, the cpuid instruction returns
2020 //  eax / registers[0]: Version of Xen
2021 //  ebx / registers[1]: chars 'XenV'
2022 //  ecx / registers[2]: chars 'MMXe'
2023 //  edx / registers[3]: chars 'nVMM'
2024 //
2025 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2026 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2027 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2028 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2029 //
2030 // more information :
2031 // https://kb.vmware.com/s/article/1009458
2032 //
2033 void VM_Version::check_virtualizations() {
2034   uint32_t registers[4] = {0};
2035   char signature[13] = {0};
2036 
2037   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2038   // from 0x40000000 until 0x40010000.
2039   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2040   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2041     detect_virt_stub(leaf, registers);
2042     memcpy(signature, &registers[1], 12);
2043 
2044     if (strncmp("VMwareVMware", signature, 12) == 0) {
2045       Abstract_VM_Version::_detected_virtualization = VMWare;
2046       // check for extended metrics from guestlib
2047       VirtualizationSupport::initialize();
2048     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2049       Abstract_VM_Version::_detected_virtualization = HyperV;
2050 #ifdef _WINDOWS
2051       // CPUID leaf 0x40000007 is available to the root partition only.
2052       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2053       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2054       detect_virt_stub(0x40000007, registers);
2055       if ((registers[0] != 0x0) ||
2056           (registers[1] != 0x0) ||
2057           (registers[2] != 0x0) ||
2058           (registers[3] != 0x0)) {
2059         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2060       }
2061 #endif
2062     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2063       Abstract_VM_Version::_detected_virtualization = KVM;
2064     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2065       Abstract_VM_Version::_detected_virtualization = XenHVM;
2066     }
2067   }
2068 }
2069 
2070 #ifdef COMPILER2
2071 // Determine if it's running on Cascade Lake using default options.
2072 bool VM_Version::is_default_intel_cascade_lake() {
2073   return FLAG_IS_DEFAULT(UseAVX) &&
2074          FLAG_IS_DEFAULT(MaxVectorSize) &&
2075          UseAVX > 2 &&
2076          is_intel_cascade_lake();
2077 }
2078 #endif
2079 
2080 bool VM_Version::is_intel_cascade_lake() {
2081   return is_intel_skylake() && _stepping >= 5;
2082 }
2083 
2084 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2085 // for implementing the array copy and clear operations.
2086 // The Intel platforms that supports the serialize instruction
2087 // has improved implementation of 64-byte load/stores and so the default
2088 // threshold is set to 0 for these platforms.
2089 int VM_Version::avx3_threshold() {
2090   return (is_intel_server_family() &&
2091           supports_serialize() &&
2092           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2093 }
2094 
2095 void VM_Version::clear_apx_test_state() {
2096   clear_apx_test_state_stub();
2097 }
2098 
2099 static bool _vm_version_initialized = false;
2100 
2101 void VM_Version::initialize() {
2102   ResourceMark rm;
2103 
2104   // Making this stub must be FIRST use of assembler
2105   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2106   if (stub_blob == nullptr) {
2107     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2108   }
2109   CodeBuffer c(stub_blob);
2110   VM_Version_StubGenerator g(&c);
2111 
2112   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2113                                      g.generate_get_cpu_info());
2114   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2115                                      g.generate_detect_virt());
2116   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2117                                      g.clear_apx_test_state());
2118   get_processor_features();
2119 
2120   Assembler::precompute_instructions();
2121 
2122   if (VM_Version::supports_hv()) { // Supports hypervisor
2123     check_virtualizations();
2124   }
2125   _vm_version_initialized = true;
2126 }
2127 
2128 typedef enum {
2129    CPU_FAMILY_8086_8088  = 0,
2130    CPU_FAMILY_INTEL_286  = 2,
2131    CPU_FAMILY_INTEL_386  = 3,
2132    CPU_FAMILY_INTEL_486  = 4,
2133    CPU_FAMILY_PENTIUM    = 5,
2134    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2135    CPU_FAMILY_PENTIUM_4  = 0xF
2136 } FamilyFlag;
2137 
2138 typedef enum {
2139   RDTSCP_FLAG  = 0x08000000, // bit 27
2140   INTEL64_FLAG = 0x20000000  // bit 29
2141 } _featureExtendedEdxFlag;
2142 
2143 typedef enum {
2144    FPU_FLAG     = 0x00000001,
2145    VME_FLAG     = 0x00000002,
2146    DE_FLAG      = 0x00000004,
2147    PSE_FLAG     = 0x00000008,
2148    TSC_FLAG     = 0x00000010,
2149    MSR_FLAG     = 0x00000020,
2150    PAE_FLAG     = 0x00000040,
2151    MCE_FLAG     = 0x00000080,
2152    CX8_FLAG     = 0x00000100,
2153    APIC_FLAG    = 0x00000200,
2154    SEP_FLAG     = 0x00000800,
2155    MTRR_FLAG    = 0x00001000,
2156    PGE_FLAG     = 0x00002000,
2157    MCA_FLAG     = 0x00004000,
2158    CMOV_FLAG    = 0x00008000,
2159    PAT_FLAG     = 0x00010000,
2160    PSE36_FLAG   = 0x00020000,
2161    PSNUM_FLAG   = 0x00040000,
2162    CLFLUSH_FLAG = 0x00080000,
2163    DTS_FLAG     = 0x00200000,
2164    ACPI_FLAG    = 0x00400000,
2165    MMX_FLAG     = 0x00800000,
2166    FXSR_FLAG    = 0x01000000,
2167    SSE_FLAG     = 0x02000000,
2168    SSE2_FLAG    = 0x04000000,
2169    SS_FLAG      = 0x08000000,
2170    HTT_FLAG     = 0x10000000,
2171    TM_FLAG      = 0x20000000
2172 } FeatureEdxFlag;
2173 
2174 static BufferBlob* cpuid_brand_string_stub_blob;
2175 static const int   cpuid_brand_string_stub_size = 550;
2176 
2177 extern "C" {
2178   typedef void (*getCPUIDBrandString_stub_t)(void*);
2179 }
2180 
2181 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2182 
2183 // VM_Version statics
2184 enum {
2185   ExtendedFamilyIdLength_INTEL = 16,
2186   ExtendedFamilyIdLength_AMD   = 24
2187 };
2188 
2189 const size_t VENDOR_LENGTH = 13;
2190 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2191 static char* _cpu_brand_string = nullptr;
2192 static int64_t _max_qualified_cpu_frequency = 0;
2193 
2194 static int _no_of_threads = 0;
2195 static int _no_of_cores = 0;
2196 
2197 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2198   "8086/8088",
2199   "",
2200   "286",
2201   "386",
2202   "486",
2203   "Pentium",
2204   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2205   "",
2206   "",
2207   "",
2208   "",
2209   "",
2210   "",
2211   "",
2212   "",
2213   "Pentium 4"
2214 };
2215 
2216 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2217   "",
2218   "",
2219   "",
2220   "",
2221   "5x86",
2222   "K5/K6",
2223   "Athlon/AthlonXP",
2224   "",
2225   "",
2226   "",
2227   "",
2228   "",
2229   "",
2230   "",
2231   "",
2232   "Opteron/Athlon64",
2233   "Opteron QC/Phenom",  // Barcelona et.al.
2234   "",
2235   "",
2236   "",
2237   "",
2238   "",
2239   "",
2240   "Zen"
2241 };
2242 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2243 // September 2013, Vol 3C Table 35-1
2244 const char* const _model_id_pentium_pro[] = {
2245   "",
2246   "Pentium Pro",
2247   "",
2248   "Pentium II model 3",
2249   "",
2250   "Pentium II model 5/Xeon/Celeron",
2251   "Celeron",
2252   "Pentium III/Pentium III Xeon",
2253   "Pentium III/Pentium III Xeon",
2254   "Pentium M model 9",    // Yonah
2255   "Pentium III, model A",
2256   "Pentium III, model B",
2257   "",
2258   "Pentium M model D",    // Dothan
2259   "",
2260   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2261   "",
2262   "",
2263   "",
2264   "",
2265   "",
2266   "",
2267   "Celeron",              // 0x16 Celeron 65nm
2268   "Core 2",               // 0x17 Penryn / Harpertown
2269   "",
2270   "",
2271   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2272   "Atom",                 // 0x1B Z5xx series Silverthorn
2273   "",
2274   "Core 2",               // 0x1D Dunnington (6-core)
2275   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2276   "",
2277   "",
2278   "",
2279   "",
2280   "",
2281   "",
2282   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2283   "",
2284   "",
2285   "",                     // 0x28
2286   "",
2287   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2288   "",
2289   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2290   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2291   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2292   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2293   "",
2294   "",
2295   "",
2296   "",
2297   "",
2298   "",
2299   "",
2300   "",
2301   "",
2302   "",
2303   "Ivy Bridge",           // 0x3a
2304   "",
2305   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2306   "",                     // 0x3d "Next Generation Intel Core Processor"
2307   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2308   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2309   "",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2315   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2316   nullptr
2317 };
2318 
2319 /* Brand ID is for back compatibility
2320  * Newer CPUs uses the extended brand string */
2321 const char* const _brand_id[] = {
2322   "",
2323   "Celeron processor",
2324   "Pentium III processor",
2325   "Intel Pentium III Xeon processor",
2326   "",
2327   "",
2328   "",
2329   "",
2330   "Intel Pentium 4 processor",
2331   nullptr
2332 };
2333 
2334 
2335 const char* const _feature_edx_id[] = {
2336   "On-Chip FPU",
2337   "Virtual Mode Extensions",
2338   "Debugging Extensions",
2339   "Page Size Extensions",
2340   "Time Stamp Counter",
2341   "Model Specific Registers",
2342   "Physical Address Extension",
2343   "Machine Check Exceptions",
2344   "CMPXCHG8B Instruction",
2345   "On-Chip APIC",
2346   "",
2347   "Fast System Call",
2348   "Memory Type Range Registers",
2349   "Page Global Enable",
2350   "Machine Check Architecture",
2351   "Conditional Mov Instruction",
2352   "Page Attribute Table",
2353   "36-bit Page Size Extension",
2354   "Processor Serial Number",
2355   "CLFLUSH Instruction",
2356   "",
2357   "Debug Trace Store feature",
2358   "ACPI registers in MSR space",
2359   "Intel Architecture MMX Technology",
2360   "Fast Float Point Save and Restore",
2361   "Streaming SIMD extensions",
2362   "Streaming SIMD extensions 2",
2363   "Self-Snoop",
2364   "Hyper Threading",
2365   "Thermal Monitor",
2366   "",
2367   "Pending Break Enable"
2368 };
2369 
2370 const char* const _feature_extended_edx_id[] = {
2371   "",
2372   "",
2373   "",
2374   "",
2375   "",
2376   "",
2377   "",
2378   "",
2379   "",
2380   "",
2381   "",
2382   "SYSCALL/SYSRET",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "Execute Disable Bit",
2392   "",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "RDTSCP",
2399   "",
2400   "Intel 64 Architecture",
2401   "",
2402   ""
2403 };
2404 
2405 const char* const _feature_ecx_id[] = {
2406   "Streaming SIMD Extensions 3",
2407   "PCLMULQDQ",
2408   "64-bit DS Area",
2409   "MONITOR/MWAIT instructions",
2410   "CPL Qualified Debug Store",
2411   "Virtual Machine Extensions",
2412   "Safer Mode Extensions",
2413   "Enhanced Intel SpeedStep technology",
2414   "Thermal Monitor 2",
2415   "Supplemental Streaming SIMD Extensions 3",
2416   "L1 Context ID",
2417   "",
2418   "Fused Multiply-Add",
2419   "CMPXCHG16B",
2420   "xTPR Update Control",
2421   "Perfmon and Debug Capability",
2422   "",
2423   "Process-context identifiers",
2424   "Direct Cache Access",
2425   "Streaming SIMD extensions 4.1",
2426   "Streaming SIMD extensions 4.2",
2427   "x2APIC",
2428   "MOVBE",
2429   "Popcount instruction",
2430   "TSC-Deadline",
2431   "AESNI",
2432   "XSAVE",
2433   "OSXSAVE",
2434   "AVX",
2435   "F16C",
2436   "RDRAND",
2437   ""
2438 };
2439 
2440 const char* const _feature_extended_ecx_id[] = {
2441   "LAHF/SAHF instruction support",
2442   "Core multi-processor legacy mode",
2443   "",
2444   "",
2445   "",
2446   "Advanced Bit Manipulations: LZCNT",
2447   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2448   "Misaligned SSE mode",
2449   "",
2450   "",
2451   "",
2452   "",
2453   "",
2454   "",
2455   "",
2456   "",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   ""
2473 };
2474 
2475 void VM_Version::initialize_tsc(void) {
2476   ResourceMark rm;
2477 
2478   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2479   if (cpuid_brand_string_stub_blob == nullptr) {
2480     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2481   }
2482   CodeBuffer c(cpuid_brand_string_stub_blob);
2483   VM_Version_StubGenerator g(&c);
2484   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2485                                    g.generate_getCPUIDBrandString());
2486 }
2487 
2488 const char* VM_Version::cpu_model_description(void) {
2489   uint32_t cpu_family = extended_cpu_family();
2490   uint32_t cpu_model = extended_cpu_model();
2491   const char* model = nullptr;
2492 
2493   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2494     for (uint32_t i = 0; i <= cpu_model; i++) {
2495       model = _model_id_pentium_pro[i];
2496       if (model == nullptr) {
2497         break;
2498       }
2499     }
2500   }
2501   return model;
2502 }
2503 
2504 const char* VM_Version::cpu_brand_string(void) {
2505   if (_cpu_brand_string == nullptr) {
2506     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2507     if (nullptr == _cpu_brand_string) {
2508       return nullptr;
2509     }
2510     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2511     if (ret_val != OS_OK) {
2512       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2513       _cpu_brand_string = nullptr;
2514     }
2515   }
2516   return _cpu_brand_string;
2517 }
2518 
2519 const char* VM_Version::cpu_brand(void) {
2520   const char*  brand  = nullptr;
2521 
2522   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2523     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2524     brand = _brand_id[0];
2525     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2526       brand = _brand_id[i];
2527     }
2528   }
2529   return brand;
2530 }
2531 
2532 bool VM_Version::cpu_is_em64t(void) {
2533   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2534 }
2535 
2536 bool VM_Version::is_netburst(void) {
2537   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2538 }
2539 
2540 bool VM_Version::supports_tscinv_ext(void) {
2541   if (!supports_tscinv_bit()) {
2542     return false;
2543   }
2544 
2545   if (is_intel()) {
2546     return true;
2547   }
2548 
2549   if (is_amd()) {
2550     return !is_amd_Barcelona();
2551   }
2552 
2553   if (is_hygon()) {
2554     return true;
2555   }
2556 
2557   return false;
2558 }
2559 
2560 void VM_Version::resolve_cpu_information_details(void) {
2561 
2562   // in future we want to base this information on proper cpu
2563   // and cache topology enumeration such as:
2564   // Intel 64 Architecture Processor Topology Enumeration
2565   // which supports system cpu and cache topology enumeration
2566   // either using 2xAPICIDs or initial APICIDs
2567 
2568   // currently only rough cpu information estimates
2569   // which will not necessarily reflect the exact configuration of the system
2570 
2571   // this is the number of logical hardware threads
2572   // visible to the operating system
2573   _no_of_threads = os::processor_count();
2574 
2575   // find out number of threads per cpu package
2576   int threads_per_package = threads_per_core() * cores_per_cpu();
2577 
2578   // use amount of threads visible to the process in order to guess number of sockets
2579   _no_of_sockets = _no_of_threads / threads_per_package;
2580 
2581   // process might only see a subset of the total number of threads
2582   // from a single processor package. Virtualization/resource management for example.
2583   // If so then just write a hard 1 as num of pkgs.
2584   if (0 == _no_of_sockets) {
2585     _no_of_sockets = 1;
2586   }
2587 
2588   // estimate the number of cores
2589   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2590 }
2591 
2592 
2593 const char* VM_Version::cpu_family_description(void) {
2594   int cpu_family_id = extended_cpu_family();
2595   if (is_amd()) {
2596     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2597       return _family_id_amd[cpu_family_id];
2598     }
2599   }
2600   if (is_intel()) {
2601     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2602       return cpu_model_description();
2603     }
2604     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2605       return _family_id_intel[cpu_family_id];
2606     }
2607   }
2608   if (is_hygon()) {
2609     return "Dhyana";
2610   }
2611   return "Unknown x86";
2612 }
2613 
2614 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2615   assert(buf != nullptr, "buffer is null!");
2616   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2617 
2618   const char* cpu_type = nullptr;
2619   const char* x64 = nullptr;
2620 
2621   if (is_intel()) {
2622     cpu_type = "Intel";
2623     x64 = cpu_is_em64t() ? " Intel64" : "";
2624   } else if (is_amd()) {
2625     cpu_type = "AMD";
2626     x64 = cpu_is_em64t() ? " AMD64" : "";
2627   } else if (is_hygon()) {
2628     cpu_type = "Hygon";
2629     x64 = cpu_is_em64t() ? " AMD64" : "";
2630   } else {
2631     cpu_type = "Unknown x86";
2632     x64 = cpu_is_em64t() ? " x86_64" : "";
2633   }
2634 
2635   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2636     cpu_type,
2637     cpu_family_description(),
2638     supports_ht() ? " (HT)" : "",
2639     supports_sse3() ? " SSE3" : "",
2640     supports_ssse3() ? " SSSE3" : "",
2641     supports_sse4_1() ? " SSE4.1" : "",
2642     supports_sse4_2() ? " SSE4.2" : "",
2643     supports_sse4a() ? " SSE4A" : "",
2644     is_netburst() ? " Netburst" : "",
2645     is_intel_family_core() ? " Core" : "",
2646     x64);
2647 
2648   return OS_OK;
2649 }
2650 
2651 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2652   assert(buf != nullptr, "buffer is null!");
2653   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2654   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2655 
2656   // invoke newly generated asm code to fetch CPU Brand String
2657   getCPUIDBrandString_stub(&_cpuid_info);
2658 
2659   // fetch results into buffer
2660   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2661   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2662   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2663   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2664   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2665   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2666   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2667   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2668   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2669   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2670   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2671   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2672 
2673   return OS_OK;
2674 }
2675 
2676 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2677   guarantee(buf != nullptr, "buffer is null!");
2678   guarantee(buf_len > 0, "buffer len not enough!");
2679 
2680   unsigned int flag = 0;
2681   unsigned int fi = 0;
2682   size_t       written = 0;
2683   const char*  prefix = "";
2684 
2685 #define WRITE_TO_BUF(string)                                                          \
2686   {                                                                                   \
2687     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2688     if (res < 0) {                                                                    \
2689       return buf_len - 1;                                                             \
2690     }                                                                                 \
2691     written += res;                                                                   \
2692     if (prefix[0] == '\0') {                                                          \
2693       prefix = ", ";                                                                  \
2694     }                                                                                 \
2695   }
2696 
2697   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2698     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2699       continue; /* no hyperthreading */
2700     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2701       continue; /* no fast system call */
2702     }
2703     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2704       WRITE_TO_BUF(_feature_edx_id[fi]);
2705     }
2706   }
2707 
2708   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2709     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2710       WRITE_TO_BUF(_feature_ecx_id[fi]);
2711     }
2712   }
2713 
2714   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2715     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2716       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2717     }
2718   }
2719 
2720   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2721     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2722       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2723     }
2724   }
2725 
2726   if (supports_tscinv_bit()) {
2727       WRITE_TO_BUF("Invariant TSC");
2728   }
2729 
2730   return written;
2731 }
2732 
2733 /**
2734  * Write a detailed description of the cpu to a given buffer, including
2735  * feature set.
2736  */
2737 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2738   assert(buf != nullptr, "buffer is null!");
2739   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2740 
2741   static const char* unknown = "<unknown>";
2742   char               vendor_id[VENDOR_LENGTH];
2743   const char*        family = nullptr;
2744   const char*        model = nullptr;
2745   const char*        brand = nullptr;
2746   int                outputLen = 0;
2747 
2748   family = cpu_family_description();
2749   if (family == nullptr) {
2750     family = unknown;
2751   }
2752 
2753   model = cpu_model_description();
2754   if (model == nullptr) {
2755     model = unknown;
2756   }
2757 
2758   brand = cpu_brand_string();
2759 
2760   if (brand == nullptr) {
2761     brand = cpu_brand();
2762     if (brand == nullptr) {
2763       brand = unknown;
2764     }
2765   }
2766 
2767   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2768   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2769   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2770   vendor_id[VENDOR_LENGTH-1] = '\0';
2771 
2772   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2773     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2774     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2775     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2776     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2777     "Supports: ",
2778     brand,
2779     vendor_id,
2780     family,
2781     extended_cpu_family(),
2782     model,
2783     extended_cpu_model(),
2784     cpu_stepping(),
2785     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2786     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2787     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2788     _cpuid_info.std_cpuid1_eax.value,
2789     _cpuid_info.std_cpuid1_ebx.value,
2790     _cpuid_info.std_cpuid1_ecx.value,
2791     _cpuid_info.std_cpuid1_edx.value,
2792     _cpuid_info.ext_cpuid1_eax,
2793     _cpuid_info.ext_cpuid1_ebx,
2794     _cpuid_info.ext_cpuid1_ecx,
2795     _cpuid_info.ext_cpuid1_edx);
2796 
2797   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2798     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2799     return OS_ERR;
2800   }
2801 
2802   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2803 
2804   return OS_OK;
2805 }
2806 
2807 
2808 // Fill in Abstract_VM_Version statics
2809 void VM_Version::initialize_cpu_information() {
2810   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2811   assert(!_initialized, "shouldn't be initialized yet");
2812   resolve_cpu_information_details();
2813 
2814   // initialize cpu_name and cpu_desc
2815   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2816   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2817   _initialized = true;
2818 }
2819 
2820 /**
2821  *  For information about extracting the frequency from the cpu brand string, please see:
2822  *
2823  *    Intel Processor Identification and the CPUID Instruction
2824  *    Application Note 485
2825  *    May 2012
2826  *
2827  * The return value is the frequency in Hz.
2828  */
2829 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2830   const char* const brand_string = cpu_brand_string();
2831   if (brand_string == nullptr) {
2832     return 0;
2833   }
2834   const int64_t MEGA = 1000000;
2835   int64_t multiplier = 0;
2836   int64_t frequency = 0;
2837   uint8_t idx = 0;
2838   // The brand string buffer is at most 48 bytes.
2839   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2840   for (; idx < 48-2; ++idx) {
2841     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2842     // Search brand string for "yHz" where y is M, G, or T.
2843     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2844       if (brand_string[idx] == 'M') {
2845         multiplier = MEGA;
2846       } else if (brand_string[idx] == 'G') {
2847         multiplier = MEGA * 1000;
2848       } else if (brand_string[idx] == 'T') {
2849         multiplier = MEGA * MEGA;
2850       }
2851       break;
2852     }
2853   }
2854   if (multiplier > 0) {
2855     // Compute frequency (in Hz) from brand string.
2856     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2857       frequency =  (brand_string[idx-4] - '0') * multiplier;
2858       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2859       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2860     } else { // format is "xxxx"
2861       frequency =  (brand_string[idx-4] - '0') * 1000;
2862       frequency += (brand_string[idx-3] - '0') * 100;
2863       frequency += (brand_string[idx-2] - '0') * 10;
2864       frequency += (brand_string[idx-1] - '0');
2865       frequency *= multiplier;
2866     }
2867   }
2868   return frequency;
2869 }
2870 
2871 
2872 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2873   if (_max_qualified_cpu_frequency == 0) {
2874     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2875   }
2876   return _max_qualified_cpu_frequency;
2877 }
2878 
2879 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2880   VM_Features vm_features;
2881   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2882     vm_features.set_feature(CPU_CX8);
2883   if (std_cpuid1_edx.bits.cmov != 0)
2884     vm_features.set_feature(CPU_CMOV);
2885   if (std_cpuid1_edx.bits.clflush != 0)
2886     vm_features.set_feature(CPU_FLUSH);
2887   // clflush should always be available on x86_64
2888   // if not we are in real trouble because we rely on it
2889   // to flush the code cache.
2890   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2891   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2892       ext_cpuid1_edx.bits.fxsr != 0))
2893     vm_features.set_feature(CPU_FXSR);
2894   // HT flag is set for multi-core processors also.
2895   if (threads_per_core() > 1)
2896     vm_features.set_feature(CPU_HT);
2897   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2898       ext_cpuid1_edx.bits.mmx != 0))
2899     vm_features.set_feature(CPU_MMX);
2900   if (std_cpuid1_edx.bits.sse != 0)
2901     vm_features.set_feature(CPU_SSE);
2902   if (std_cpuid1_edx.bits.sse2 != 0)
2903     vm_features.set_feature(CPU_SSE2);
2904   if (std_cpuid1_ecx.bits.sse3 != 0)
2905     vm_features.set_feature(CPU_SSE3);
2906   if (std_cpuid1_ecx.bits.ssse3 != 0)
2907     vm_features.set_feature(CPU_SSSE3);
2908   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2909     vm_features.set_feature(CPU_SSE4_1);
2910   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2911     vm_features.set_feature(CPU_SSE4_2);
2912   if (std_cpuid1_ecx.bits.popcnt != 0)
2913     vm_features.set_feature(CPU_POPCNT);
2914   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2915       xem_xcr0_eax.bits.apx_f != 0) {
2916     vm_features.set_feature(CPU_APX_F);
2917   }
2918   if (std_cpuid1_ecx.bits.avx != 0 &&
2919       std_cpuid1_ecx.bits.osxsave != 0 &&
2920       xem_xcr0_eax.bits.sse != 0 &&
2921       xem_xcr0_eax.bits.ymm != 0) {
2922     vm_features.set_feature(CPU_AVX);
2923     vm_features.set_feature(CPU_VZEROUPPER);
2924     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2925       vm_features.set_feature(CPU_SHA512);
2926     if (std_cpuid1_ecx.bits.f16c != 0)
2927       vm_features.set_feature(CPU_F16C);
2928     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2929       vm_features.set_feature(CPU_AVX2);
2930       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2931         vm_features.set_feature(CPU_AVX_IFMA);
2932     }
2933     if (sef_cpuid7_ecx.bits.gfni != 0)
2934         vm_features.set_feature(CPU_GFNI);
2935     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2936         xem_xcr0_eax.bits.opmask != 0 &&
2937         xem_xcr0_eax.bits.zmm512 != 0 &&
2938         xem_xcr0_eax.bits.zmm32 != 0) {
2939       vm_features.set_feature(CPU_AVX512F);
2940       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2941         vm_features.set_feature(CPU_AVX512CD);
2942       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2943         vm_features.set_feature(CPU_AVX512DQ);
2944       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2945         vm_features.set_feature(CPU_AVX512_IFMA);
2946       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2947         vm_features.set_feature(CPU_AVX512PF);
2948       if (sef_cpuid7_ebx.bits.avx512er != 0)
2949         vm_features.set_feature(CPU_AVX512ER);
2950       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2951         vm_features.set_feature(CPU_AVX512BW);
2952       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2953         vm_features.set_feature(CPU_AVX512VL);
2954       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2955         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2956       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2957         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2958       if (sef_cpuid7_ecx.bits.vaes != 0)
2959         vm_features.set_feature(CPU_AVX512_VAES);
2960       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2961         vm_features.set_feature(CPU_AVX512_VNNI);
2962       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2963         vm_features.set_feature(CPU_AVX512_BITALG);
2964       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2965         vm_features.set_feature(CPU_AVX512_VBMI);
2966       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2967         vm_features.set_feature(CPU_AVX512_VBMI2);
2968     }
2969     if (is_intel()) {
2970       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2971           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2972           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2973           xem_xcr0_eax.bits.opmask != 0 &&
2974           xem_xcr0_eax.bits.zmm512 != 0 &&
2975           xem_xcr0_eax.bits.zmm32 != 0) {
2976         vm_features.set_feature(CPU_AVX10_1);
2977         vm_features.set_feature(CPU_AVX512F);
2978         vm_features.set_feature(CPU_AVX512CD);
2979         vm_features.set_feature(CPU_AVX512DQ);
2980         vm_features.set_feature(CPU_AVX512PF);
2981         vm_features.set_feature(CPU_AVX512ER);
2982         vm_features.set_feature(CPU_AVX512BW);
2983         vm_features.set_feature(CPU_AVX512VL);
2984         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2985         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2986         vm_features.set_feature(CPU_AVX512_VAES);
2987         vm_features.set_feature(CPU_AVX512_VNNI);
2988         vm_features.set_feature(CPU_AVX512_BITALG);
2989         vm_features.set_feature(CPU_AVX512_VBMI);
2990         vm_features.set_feature(CPU_AVX512_VBMI2);
2991         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
2992           vm_features.set_feature(CPU_AVX10_2);
2993         }
2994       }
2995     }
2996   }
2997 
2998   if (std_cpuid1_ecx.bits.hv != 0)
2999     vm_features.set_feature(CPU_HV);
3000   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3001     vm_features.set_feature(CPU_BMI1);
3002   if (std_cpuid1_edx.bits.tsc != 0)
3003     vm_features.set_feature(CPU_TSC);
3004   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3005     vm_features.set_feature(CPU_TSCINV_BIT);
3006   if (std_cpuid1_ecx.bits.aes != 0)
3007     vm_features.set_feature(CPU_AES);
3008   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3009     vm_features.set_feature(CPU_LZCNT);
3010   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3011     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3012   if (sef_cpuid7_ebx.bits.erms != 0)
3013     vm_features.set_feature(CPU_ERMS);
3014   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3015     vm_features.set_feature(CPU_FSRM);
3016   if (std_cpuid1_ecx.bits.clmul != 0)
3017     vm_features.set_feature(CPU_CLMUL);
3018   if (sef_cpuid7_ebx.bits.rtm != 0)
3019     vm_features.set_feature(CPU_RTM);
3020   if (sef_cpuid7_ebx.bits.adx != 0)
3021      vm_features.set_feature(CPU_ADX);
3022   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3023     vm_features.set_feature(CPU_BMI2);
3024   if (sef_cpuid7_ebx.bits.sha != 0)
3025     vm_features.set_feature(CPU_SHA);
3026   if (std_cpuid1_ecx.bits.fma != 0)
3027     vm_features.set_feature(CPU_FMA);
3028   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3029     vm_features.set_feature(CPU_FLUSHOPT);
3030   if (sef_cpuid7_ebx.bits.clwb != 0)
3031     vm_features.set_feature(CPU_CLWB);
3032   if (ext_cpuid1_edx.bits.rdtscp != 0)
3033     vm_features.set_feature(CPU_RDTSCP);
3034   if (sef_cpuid7_ecx.bits.rdpid != 0)
3035     vm_features.set_feature(CPU_RDPID);
3036 
3037   // AMD|Hygon additional features.
3038   if (is_amd_family()) {
3039     // PREFETCHW was checked above, check TDNOW here.
3040     if ((ext_cpuid1_edx.bits.tdnow != 0))
3041       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3042     if (ext_cpuid1_ecx.bits.sse4a != 0)
3043       vm_features.set_feature(CPU_SSE4A);
3044   }
3045 
3046   // Intel additional features.
3047   if (is_intel()) {
3048     if (sef_cpuid7_edx.bits.serialize != 0)
3049       vm_features.set_feature(CPU_SERIALIZE);
3050     if (sef_cpuid7_edx.bits.hybrid != 0)
3051       vm_features.set_feature(CPU_HYBRID);
3052     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3053       vm_features.set_feature(CPU_AVX512_FP16);
3054   }
3055 
3056   // ZX additional features.
3057   if (is_zx()) {
3058     // We do not know if these are supported by ZX, so we cannot trust
3059     // common CPUID bit for them.
3060     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3061     vm_features.clear_feature(CPU_CLWB);
3062   }
3063 
3064   // Protection key features.
3065   if (sef_cpuid7_ecx.bits.pku != 0) {
3066     vm_features.set_feature(CPU_PKU);
3067   }
3068   if (sef_cpuid7_ecx.bits.ospke != 0) {
3069     vm_features.set_feature(CPU_OSPKE);
3070   }
3071 
3072   // Control flow enforcement (CET) features.
3073   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3074     vm_features.set_feature(CPU_CET_SS);
3075   }
3076   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3077     vm_features.set_feature(CPU_CET_IBT);
3078   }
3079 
3080   // Composite features.
3081   if (supports_tscinv_bit() &&
3082       ((is_amd_family() && !is_amd_Barcelona()) ||
3083        is_intel_tsc_synched_at_init())) {
3084     vm_features.set_feature(CPU_TSCINV);
3085   }
3086   return vm_features;
3087 }
3088 
3089 bool VM_Version::os_supports_avx_vectors() {
3090   bool retVal = false;
3091   int nreg = 4;
3092   if (supports_evex()) {
3093     // Verify that OS save/restore all bits of EVEX registers
3094     // during signal processing.
3095     retVal = true;
3096     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3097       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3098         retVal = false;
3099         break;
3100       }
3101     }
3102   } else if (supports_avx()) {
3103     // Verify that OS save/restore all bits of AVX registers
3104     // during signal processing.
3105     retVal = true;
3106     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3107       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3108         retVal = false;
3109         break;
3110       }
3111     }
3112     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3113     if (retVal == false) {
3114       // Verify that OS save/restore all bits of EVEX registers
3115       // during signal processing.
3116       retVal = true;
3117       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3118         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3119           retVal = false;
3120           break;
3121         }
3122       }
3123     }
3124   }
3125   return retVal;
3126 }
3127 
3128 bool VM_Version::os_supports_apx_egprs() {
3129   if (!supports_apx_f()) {
3130     return false;
3131   }
3132   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3133       _cpuid_info.apx_save[1] != egpr_test_value()) {
3134     return false;
3135   }
3136   return true;
3137 }
3138 
3139 uint VM_Version::cores_per_cpu() {
3140   uint result = 1;
3141   if (is_intel()) {
3142     bool supports_topology = supports_processor_topology();
3143     if (supports_topology) {
3144       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3145                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3146     }
3147     if (!supports_topology || result == 0) {
3148       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3149     }
3150   } else if (is_amd_family()) {
3151     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3152     if (cpu_family() >= 0x17) { // Zen or later
3153       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3154     }
3155   } else if (is_zx()) {
3156     bool supports_topology = supports_processor_topology();
3157     if (supports_topology) {
3158       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3159                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3160     }
3161     if (!supports_topology || result == 0) {
3162       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3163     }
3164   }
3165   return result;
3166 }
3167 
3168 uint VM_Version::threads_per_core() {
3169   uint result = 1;
3170   if (is_intel() && supports_processor_topology()) {
3171     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3172   } else if (is_zx() && supports_processor_topology()) {
3173     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3174   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3175     if (cpu_family() >= 0x17) {
3176       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3177     } else {
3178       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3179                  cores_per_cpu();
3180     }
3181   }
3182   return (result == 0 ? 1 : result);
3183 }
3184 
3185 uint VM_Version::L1_line_size() {
3186   uint result = 0;
3187   if (is_intel()) {
3188     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3189   } else if (is_amd_family()) {
3190     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3191   } else if (is_zx()) {
3192     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3193   }
3194   if (result < 32) // not defined ?
3195     result = 32;   // 32 bytes by default on x86 and other x64
3196   return result;
3197 }
3198 
3199 bool VM_Version::is_intel_tsc_synched_at_init() {
3200   if (is_intel_family_core()) {
3201     uint32_t ext_model = extended_cpu_model();
3202     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3203         ext_model == CPU_MODEL_WESTMERE_EP    ||
3204         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3205         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3206       // <= 2-socket invariant tsc support. EX versions are usually used
3207       // in > 2-socket systems and likely don't synchronize tscs at
3208       // initialization.
3209       // Code that uses tsc values must be prepared for them to arbitrarily
3210       // jump forward or backward.
3211       return true;
3212     }
3213   }
3214   return false;
3215 }
3216 
3217 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3218   // Hardware prefetching (distance/size in bytes):
3219   // Pentium 3 -  64 /  32
3220   // Pentium 4 - 256 / 128
3221   // Athlon    -  64 /  32 ????
3222   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3223   // Core      - 128 /  64
3224   //
3225   // Software prefetching (distance in bytes / instruction with best score):
3226   // Pentium 3 - 128 / prefetchnta
3227   // Pentium 4 - 512 / prefetchnta
3228   // Athlon    - 128 / prefetchnta
3229   // Opteron   - 256 / prefetchnta
3230   // Core      - 256 / prefetchnta
3231   // It will be used only when AllocatePrefetchStyle > 0
3232 
3233   if (is_amd_family()) { // AMD | Hygon
3234     if (supports_sse2()) {
3235       return 256; // Opteron
3236     } else {
3237       return 128; // Athlon
3238     }
3239   } else { // Intel
3240     if (supports_sse3() && is_intel_server_family()) {
3241       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3242         return 192;
3243       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3244         return 384;
3245       }
3246     }
3247     if (supports_sse2()) {
3248       if (is_intel_server_family()) {
3249         return 256; // Pentium M, Core, Core2
3250       } else {
3251         return 512; // Pentium 4
3252       }
3253     } else {
3254       return 128; // Pentium 3 (and all other old CPUs)
3255     }
3256   }
3257 }
3258 
3259 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3260   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3261   switch (id) {
3262   case vmIntrinsics::_floatToFloat16:
3263   case vmIntrinsics::_float16ToFloat:
3264     if (!supports_float16()) {
3265       return false;
3266     }
3267     break;
3268   default:
3269     break;
3270   }
3271   return true;
3272 }
3273 
3274 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3275   int i = 0;
3276   ss.join([&]() {
3277     while (i < MAX_CPU_FEATURES) {
3278       if (_features.supports_feature((VM_Version::Feature_Flag)i)) {
3279         return _features_names[i++];
3280       }
3281       i += 1;
3282     }
3283     return (const char*)nullptr;
3284   }, ", ");
3285 }