1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2000;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76 }
  77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  78 static detect_virt_stub_t detect_virt_stub = nullptr;
  79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  80 
  81 bool VM_Version::supports_clflush() {
  82   // clflush should always be available on x86_64
  83   // if not we are in real trouble because we rely on it
  84   // to flush the code cache.
  85   // Unfortunately, Assembler::clflush is currently called as part
  86   // of generation of the code cache flush routine. This happens
  87   // under Universe::init before the processor features are set
  88   // up. Assembler::flush calls this routine to check that clflush
  89   // is allowed. So, we give the caller a free pass if Universe init
  90   // is still in progress.
  91   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  92   return true;
  93 }
  94 
  95 #define CPUID_STANDARD_FN   0x0
  96 #define CPUID_STANDARD_FN_1 0x1
  97 #define CPUID_STANDARD_FN_4 0x4
  98 #define CPUID_STANDARD_FN_B 0xb
  99 
 100 #define CPUID_EXTENDED_FN   0x80000000
 101 #define CPUID_EXTENDED_FN_1 0x80000001
 102 #define CPUID_EXTENDED_FN_2 0x80000002
 103 #define CPUID_EXTENDED_FN_3 0x80000003
 104 #define CPUID_EXTENDED_FN_4 0x80000004
 105 #define CPUID_EXTENDED_FN_7 0x80000007
 106 #define CPUID_EXTENDED_FN_8 0x80000008
 107 
 108 class VM_Version_StubGenerator: public StubCodeGenerator {
 109  public:
 110 
 111   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 112 
 113   address clear_apx_test_state() {
 114 #   define __ _masm->
 115     address start = __ pc();
 116     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 117     // handling guarantees that preserved register values post signal handling were
 118     // re-instantiated by operating system and not because they were not modified externally.
 119 
 120     bool save_apx = UseAPX;
 121     VM_Version::set_apx_cpuFeatures();
 122     UseAPX = true;
 123     // EGPR state save/restoration.
 124     __ mov64(r16, 0L);
 125     __ mov64(r31, 0L);
 126     UseAPX = save_apx;
 127     VM_Version::clean_cpuFeatures();
 128     __ ret(0);
 129     return start;
 130   }
 131 
 132   address generate_get_cpu_info() {
 133     // Flags to test CPU type.
 134     const uint32_t HS_EFL_AC = 0x40000;
 135     const uint32_t HS_EFL_ID = 0x200000;
 136     // Values for when we don't have a CPUID instruction.
 137     const int      CPU_FAMILY_SHIFT = 8;
 138     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 139     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 140     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 141 
 142     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24;
 143     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 144     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 145     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 146 
 147     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 148 #   define __ _masm->
 149 
 150     address start = __ pc();
 151 
 152     //
 153     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 154     //
 155     // rcx and rdx are first and second argument registers on windows
 156 
 157     __ push(rbp);
 158     __ mov(rbp, c_rarg0); // cpuid_info address
 159     __ push(rbx);
 160     __ push(rsi);
 161     __ pushf();          // preserve rbx, and flags
 162     __ pop(rax);
 163     __ push(rax);
 164     __ mov(rcx, rax);
 165     //
 166     // if we are unable to change the AC flag, we have a 386
 167     //
 168     __ xorl(rax, HS_EFL_AC);
 169     __ push(rax);
 170     __ popf();
 171     __ pushf();
 172     __ pop(rax);
 173     __ cmpptr(rax, rcx);
 174     __ jccb(Assembler::notEqual, detect_486);
 175 
 176     __ movl(rax, CPU_FAMILY_386);
 177     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 178     __ jmp(done);
 179 
 180     //
 181     // If we are unable to change the ID flag, we have a 486 which does
 182     // not support the "cpuid" instruction.
 183     //
 184     __ bind(detect_486);
 185     __ mov(rax, rcx);
 186     __ xorl(rax, HS_EFL_ID);
 187     __ push(rax);
 188     __ popf();
 189     __ pushf();
 190     __ pop(rax);
 191     __ cmpptr(rcx, rax);
 192     __ jccb(Assembler::notEqual, detect_586);
 193 
 194     __ bind(cpu486);
 195     __ movl(rax, CPU_FAMILY_486);
 196     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 197     __ jmp(done);
 198 
 199     //
 200     // At this point, we have a chip which supports the "cpuid" instruction
 201     //
 202     __ bind(detect_586);
 203     __ xorl(rax, rax);
 204     __ cpuid();
 205     __ orl(rax, rax);
 206     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 207                                         // value of at least 1, we give up and
 208                                         // assume a 486
 209     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 210     __ movl(Address(rsi, 0), rax);
 211     __ movl(Address(rsi, 4), rbx);
 212     __ movl(Address(rsi, 8), rcx);
 213     __ movl(Address(rsi,12), rdx);
 214 
 215     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 216     __ jccb(Assembler::belowEqual, std_cpuid4);
 217 
 218     //
 219     // cpuid(0xB) Processor Topology
 220     //
 221     __ movl(rax, 0xb);
 222     __ xorl(rcx, rcx);   // Threads level
 223     __ cpuid();
 224 
 225     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 226     __ movl(Address(rsi, 0), rax);
 227     __ movl(Address(rsi, 4), rbx);
 228     __ movl(Address(rsi, 8), rcx);
 229     __ movl(Address(rsi,12), rdx);
 230 
 231     __ movl(rax, 0xb);
 232     __ movl(rcx, 1);     // Cores level
 233     __ cpuid();
 234     __ push(rax);
 235     __ andl(rax, 0x1f);  // Determine if valid topology level
 236     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 237     __ andl(rax, 0xffff);
 238     __ pop(rax);
 239     __ jccb(Assembler::equal, std_cpuid4);
 240 
 241     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 242     __ movl(Address(rsi, 0), rax);
 243     __ movl(Address(rsi, 4), rbx);
 244     __ movl(Address(rsi, 8), rcx);
 245     __ movl(Address(rsi,12), rdx);
 246 
 247     __ movl(rax, 0xb);
 248     __ movl(rcx, 2);     // Packages level
 249     __ cpuid();
 250     __ push(rax);
 251     __ andl(rax, 0x1f);  // Determine if valid topology level
 252     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 253     __ andl(rax, 0xffff);
 254     __ pop(rax);
 255     __ jccb(Assembler::equal, std_cpuid4);
 256 
 257     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 258     __ movl(Address(rsi, 0), rax);
 259     __ movl(Address(rsi, 4), rbx);
 260     __ movl(Address(rsi, 8), rcx);
 261     __ movl(Address(rsi,12), rdx);
 262 
 263     //
 264     // cpuid(0x4) Deterministic cache params
 265     //
 266     __ bind(std_cpuid4);
 267     __ movl(rax, 4);
 268     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 269     __ jccb(Assembler::greater, std_cpuid1);
 270 
 271     __ xorl(rcx, rcx);   // L1 cache
 272     __ cpuid();
 273     __ push(rax);
 274     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 275     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 276     __ pop(rax);
 277     __ jccb(Assembler::equal, std_cpuid1);
 278 
 279     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 280     __ movl(Address(rsi, 0), rax);
 281     __ movl(Address(rsi, 4), rbx);
 282     __ movl(Address(rsi, 8), rcx);
 283     __ movl(Address(rsi,12), rdx);
 284 
 285     //
 286     // Standard cpuid(0x1)
 287     //
 288     __ bind(std_cpuid1);
 289     __ movl(rax, 1);
 290     __ cpuid();
 291     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 292     __ movl(Address(rsi, 0), rax);
 293     __ movl(Address(rsi, 4), rbx);
 294     __ movl(Address(rsi, 8), rcx);
 295     __ movl(Address(rsi,12), rdx);
 296 
 297     //
 298     // Check if OS has enabled XGETBV instruction to access XCR0
 299     // (OSXSAVE feature flag) and CPU supports AVX
 300     //
 301     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 302     __ cmpl(rcx, 0x18000000);
 303     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 304 
 305     //
 306     // XCR0, XFEATURE_ENABLED_MASK register
 307     //
 308     __ xorl(rcx, rcx);   // zero for XCR0 register
 309     __ xgetbv();
 310     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 311     __ movl(Address(rsi, 0), rax);
 312     __ movl(Address(rsi, 4), rdx);
 313 
 314     //
 315     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 316     //
 317     __ bind(sef_cpuid);
 318     __ movl(rax, 7);
 319     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 320     __ jccb(Assembler::greater, ext_cpuid);
 321     // ECX = 0
 322     __ xorl(rcx, rcx);
 323     __ cpuid();
 324     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 325     __ movl(Address(rsi, 0), rax);
 326     __ movl(Address(rsi, 4), rbx);
 327     __ movl(Address(rsi, 8), rcx);
 328     __ movl(Address(rsi, 12), rdx);
 329 
 330     //
 331     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 332     //
 333     __ bind(sefsl1_cpuid);
 334     __ movl(rax, 7);
 335     __ movl(rcx, 1);
 336     __ cpuid();
 337     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 338     __ movl(Address(rsi, 0), rax);
 339     __ movl(Address(rsi, 4), rdx);
 340 
 341     //
 342     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 343     //
 344     __ bind(std_cpuid24);
 345     __ movl(rax, 0x24);
 346     __ movl(rcx, 0);
 347     __ cpuid();
 348     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 349     __ movl(Address(rsi, 0), rax);
 350     __ movl(Address(rsi, 4), rbx);
 351 
 352     //
 353     // Extended cpuid(0x80000000)
 354     //
 355     __ bind(ext_cpuid);
 356     __ movl(rax, 0x80000000);
 357     __ cpuid();
 358     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 359     __ jcc(Assembler::belowEqual, done);
 360     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 361     __ jcc(Assembler::belowEqual, ext_cpuid1);
 362     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 363     __ jccb(Assembler::belowEqual, ext_cpuid5);
 364     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 365     __ jccb(Assembler::belowEqual, ext_cpuid7);
 366     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 367     __ jccb(Assembler::belowEqual, ext_cpuid8);
 368     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 369     __ jccb(Assembler::below, ext_cpuid8);
 370     //
 371     // Extended cpuid(0x8000001E)
 372     //
 373     __ movl(rax, 0x8000001E);
 374     __ cpuid();
 375     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 376     __ movl(Address(rsi, 0), rax);
 377     __ movl(Address(rsi, 4), rbx);
 378     __ movl(Address(rsi, 8), rcx);
 379     __ movl(Address(rsi,12), rdx);
 380 
 381     //
 382     // Extended cpuid(0x80000008)
 383     //
 384     __ bind(ext_cpuid8);
 385     __ movl(rax, 0x80000008);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000007)
 395     //
 396     __ bind(ext_cpuid7);
 397     __ movl(rax, 0x80000007);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000005)
 407     //
 408     __ bind(ext_cpuid5);
 409     __ movl(rax, 0x80000005);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000001)
 419     //
 420     __ bind(ext_cpuid1);
 421     __ movl(rax, 0x80000001);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Check if OS has enabled XGETBV instruction to access XCR0
 431     // (OSXSAVE feature flag) and CPU supports APX
 432     //
 433     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 434     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 436     __ movl(rax, 0x200000);
 437     __ andl(rax, Address(rsi, 4));
 438     __ jcc(Assembler::equal, vector_save_restore);
 439     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 440     __ movl(rax, 0x80000);
 441     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 442     __ jcc(Assembler::equal, vector_save_restore);
 443 
 444     bool save_apx = UseAPX;
 445     VM_Version::set_apx_cpuFeatures();
 446     UseAPX = true;
 447     __ mov64(r16, VM_Version::egpr_test_value());
 448     __ mov64(r31, VM_Version::egpr_test_value());
 449     __ xorl(rsi, rsi);
 450     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 451     // Generate SEGV
 452     __ movl(rax, Address(rsi, 0));
 453 
 454     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 455     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 456     __ movq(Address(rsi, 0), r16);
 457     __ movq(Address(rsi, 8), r31);
 458 
 459     UseAPX = save_apx;
 460     __ bind(vector_save_restore);
 461     //
 462     // Check if OS has enabled XGETBV instruction to access XCR0
 463     // (OSXSAVE feature flag) and CPU supports AVX
 464     //
 465     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 466     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 467     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 468     __ cmpl(rcx, 0x18000000);
 469     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 470 
 471     __ movl(rax, 0x6);
 472     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 473     __ cmpl(rax, 0x6);
 474     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 475 
 476     // we need to bridge farther than imm8, so we use this island as a thunk
 477     __ bind(done);
 478     __ jmp(wrapup);
 479 
 480     __ bind(start_simd_check);
 481     //
 482     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 483     // registers are not restored after a signal processing.
 484     // Generate SEGV here (reference through null)
 485     // and check upper YMM/ZMM bits after it.
 486     //
 487     int saved_useavx = UseAVX;
 488     int saved_usesse = UseSSE;
 489 
 490     // If UseAVX is uninitialized or is set by the user to include EVEX
 491     if (use_evex) {
 492       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 493       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 494       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 495       __ movl(rax, 0x10000);
 496       __ andl(rax, Address(rsi, 4));
 497       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 498       __ movl(rbx, 0x80000);
 499       __ andl(rbx, Address(rsi, 4));
 500       __ orl(rax, rbx);
 501       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 502       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 503       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 504       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 505       __ movl(rax, 0xE0);
 506       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 507       __ cmpl(rax, 0xE0);
 508       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 509 
 510       if (FLAG_IS_DEFAULT(UseAVX)) {
 511         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 512         __ movl(rax, Address(rsi, 0));
 513         __ cmpl(rax, 0x50654);              // If it is Skylake
 514         __ jcc(Assembler::equal, legacy_setup);
 515       }
 516       // EVEX setup: run in lowest evex mode
 517       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 518       UseAVX = 3;
 519       UseSSE = 2;
 520 #ifdef _WINDOWS
 521       // xmm5-xmm15 are not preserved by caller on windows
 522       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 523       __ subptr(rsp, 64);
 524       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 525       __ subptr(rsp, 64);
 526       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 527       __ subptr(rsp, 64);
 528       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 529 #endif // _WINDOWS
 530 
 531       // load value into all 64 bytes of zmm7 register
 532       __ movl(rcx, VM_Version::ymm_test_value());
 533       __ movdl(xmm0, rcx);
 534       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 535       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 536       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 537       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 538       VM_Version::clean_cpuFeatures();
 539       __ jmp(save_restore_except);
 540     }
 541 
 542     __ bind(legacy_setup);
 543     // AVX setup
 544     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 545     UseAVX = 1;
 546     UseSSE = 2;
 547 #ifdef _WINDOWS
 548     __ subptr(rsp, 32);
 549     __ vmovdqu(Address(rsp, 0), xmm7);
 550     __ subptr(rsp, 32);
 551     __ vmovdqu(Address(rsp, 0), xmm8);
 552     __ subptr(rsp, 32);
 553     __ vmovdqu(Address(rsp, 0), xmm15);
 554 #endif // _WINDOWS
 555 
 556     // load value into all 32 bytes of ymm7 register
 557     __ movl(rcx, VM_Version::ymm_test_value());
 558 
 559     __ movdl(xmm0, rcx);
 560     __ pshufd(xmm0, xmm0, 0x00);
 561     __ vinsertf128_high(xmm0, xmm0);
 562     __ vmovdqu(xmm7, xmm0);
 563     __ vmovdqu(xmm8, xmm0);
 564     __ vmovdqu(xmm15, xmm0);
 565     VM_Version::clean_cpuFeatures();
 566 
 567     __ bind(save_restore_except);
 568     __ xorl(rsi, rsi);
 569     VM_Version::set_cpuinfo_segv_addr(__ pc());
 570     // Generate SEGV
 571     __ movl(rax, Address(rsi, 0));
 572 
 573     VM_Version::set_cpuinfo_cont_addr(__ pc());
 574     // Returns here after signal. Save xmm0 to check it later.
 575 
 576     // If UseAVX is uninitialized or is set by the user to include EVEX
 577     if (use_evex) {
 578       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 579       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 580       __ movl(rax, 0x10000);
 581       __ andl(rax, Address(rsi, 4));
 582       __ jcc(Assembler::equal, legacy_save_restore);
 583       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 584       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 585       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 586       __ movl(rax, 0xE0);
 587       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 588       __ cmpl(rax, 0xE0);
 589       __ jcc(Assembler::notEqual, legacy_save_restore);
 590 
 591       if (FLAG_IS_DEFAULT(UseAVX)) {
 592         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 593         __ movl(rax, Address(rsi, 0));
 594         __ cmpl(rax, 0x50654);              // If it is Skylake
 595         __ jcc(Assembler::equal, legacy_save_restore);
 596       }
 597       // EVEX check: run in lowest evex mode
 598       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 599       UseAVX = 3;
 600       UseSSE = 2;
 601       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 602       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 603       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 604       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 605       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 606 
 607 #ifdef _WINDOWS
 608       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 609       __ addptr(rsp, 64);
 610       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 611       __ addptr(rsp, 64);
 612       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 613       __ addptr(rsp, 64);
 614 #endif // _WINDOWS
 615       generate_vzeroupper(wrapup);
 616       VM_Version::clean_cpuFeatures();
 617       UseAVX = saved_useavx;
 618       UseSSE = saved_usesse;
 619       __ jmp(wrapup);
 620    }
 621 
 622     __ bind(legacy_save_restore);
 623     // AVX check
 624     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 625     UseAVX = 1;
 626     UseSSE = 2;
 627     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 628     __ vmovdqu(Address(rsi, 0), xmm0);
 629     __ vmovdqu(Address(rsi, 32), xmm7);
 630     __ vmovdqu(Address(rsi, 64), xmm8);
 631     __ vmovdqu(Address(rsi, 96), xmm15);
 632 
 633 #ifdef _WINDOWS
 634     __ vmovdqu(xmm15, Address(rsp, 0));
 635     __ addptr(rsp, 32);
 636     __ vmovdqu(xmm8, Address(rsp, 0));
 637     __ addptr(rsp, 32);
 638     __ vmovdqu(xmm7, Address(rsp, 0));
 639     __ addptr(rsp, 32);
 640 #endif // _WINDOWS
 641 
 642     generate_vzeroupper(wrapup);
 643     VM_Version::clean_cpuFeatures();
 644     UseAVX = saved_useavx;
 645     UseSSE = saved_usesse;
 646 
 647     __ bind(wrapup);
 648     __ popf();
 649     __ pop(rsi);
 650     __ pop(rbx);
 651     __ pop(rbp);
 652     __ ret(0);
 653 
 654 #   undef __
 655 
 656     return start;
 657   };
 658   void generate_vzeroupper(Label& L_wrapup) {
 659 #   define __ _masm->
 660     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 661     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 662     __ jcc(Assembler::notEqual, L_wrapup);
 663     __ movl(rcx, 0x0FFF0FF0);
 664     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 665     __ andl(rcx, Address(rsi, 0));
 666     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 667     __ jcc(Assembler::equal, L_wrapup);
 668     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 669     __ jcc(Assembler::equal, L_wrapup);
 670     // vzeroupper() will use a pre-computed instruction sequence that we
 671     // can't compute until after we've determined CPU capabilities. Use
 672     // uncached variant here directly to be able to bootstrap correctly
 673     __ vzeroupper_uncached();
 674 #   undef __
 675   }
 676   address generate_detect_virt() {
 677     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 678 #   define __ _masm->
 679 
 680     address start = __ pc();
 681 
 682     // Evacuate callee-saved registers
 683     __ push(rbp);
 684     __ push(rbx);
 685     __ push(rsi); // for Windows
 686 
 687     __ mov(rax, c_rarg0); // CPUID leaf
 688     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 689 
 690     __ cpuid();
 691 
 692     // Store result to register array
 693     __ movl(Address(rsi,  0), rax);
 694     __ movl(Address(rsi,  4), rbx);
 695     __ movl(Address(rsi,  8), rcx);
 696     __ movl(Address(rsi, 12), rdx);
 697 
 698     // Epilogue
 699     __ pop(rsi);
 700     __ pop(rbx);
 701     __ pop(rbp);
 702     __ ret(0);
 703 
 704 #   undef __
 705 
 706     return start;
 707   };
 708 
 709 
 710   address generate_getCPUIDBrandString(void) {
 711     // Flags to test CPU type.
 712     const uint32_t HS_EFL_AC           = 0x40000;
 713     const uint32_t HS_EFL_ID           = 0x200000;
 714     // Values for when we don't have a CPUID instruction.
 715     const int      CPU_FAMILY_SHIFT = 8;
 716     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 717     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 718 
 719     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 720 
 721     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 722 #   define __ _masm->
 723 
 724     address start = __ pc();
 725 
 726     //
 727     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 728     //
 729     // rcx and rdx are first and second argument registers on windows
 730 
 731     __ push(rbp);
 732     __ mov(rbp, c_rarg0); // cpuid_info address
 733     __ push(rbx);
 734     __ push(rsi);
 735     __ pushf();          // preserve rbx, and flags
 736     __ pop(rax);
 737     __ push(rax);
 738     __ mov(rcx, rax);
 739     //
 740     // if we are unable to change the AC flag, we have a 386
 741     //
 742     __ xorl(rax, HS_EFL_AC);
 743     __ push(rax);
 744     __ popf();
 745     __ pushf();
 746     __ pop(rax);
 747     __ cmpptr(rax, rcx);
 748     __ jccb(Assembler::notEqual, detect_486);
 749 
 750     __ movl(rax, CPU_FAMILY_386);
 751     __ jmp(done);
 752 
 753     //
 754     // If we are unable to change the ID flag, we have a 486 which does
 755     // not support the "cpuid" instruction.
 756     //
 757     __ bind(detect_486);
 758     __ mov(rax, rcx);
 759     __ xorl(rax, HS_EFL_ID);
 760     __ push(rax);
 761     __ popf();
 762     __ pushf();
 763     __ pop(rax);
 764     __ cmpptr(rcx, rax);
 765     __ jccb(Assembler::notEqual, detect_586);
 766 
 767     __ bind(cpu486);
 768     __ movl(rax, CPU_FAMILY_486);
 769     __ jmp(done);
 770 
 771     //
 772     // At this point, we have a chip which supports the "cpuid" instruction
 773     //
 774     __ bind(detect_586);
 775     __ xorl(rax, rax);
 776     __ cpuid();
 777     __ orl(rax, rax);
 778     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 779                                         // value of at least 1, we give up and
 780                                         // assume a 486
 781 
 782     //
 783     // Extended cpuid(0x80000000) for processor brand string detection
 784     //
 785     __ bind(ext_cpuid);
 786     __ movl(rax, CPUID_EXTENDED_FN);
 787     __ cpuid();
 788     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 789     __ jcc(Assembler::below, done);
 790 
 791     //
 792     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 793     //
 794     __ movl(rax, CPUID_EXTENDED_FN_2);
 795     __ cpuid();
 796     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 797     __ movl(Address(rsi, 0), rax);
 798     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 799     __ movl(Address(rsi, 0), rbx);
 800     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 801     __ movl(Address(rsi, 0), rcx);
 802     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 803     __ movl(Address(rsi,0), rdx);
 804 
 805     //
 806     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 807     //
 808     __ movl(rax, CPUID_EXTENDED_FN_3);
 809     __ cpuid();
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 811     __ movl(Address(rsi, 0), rax);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 813     __ movl(Address(rsi, 0), rbx);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 815     __ movl(Address(rsi, 0), rcx);
 816     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 817     __ movl(Address(rsi,0), rdx);
 818 
 819     //
 820     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 821     //
 822     __ movl(rax, CPUID_EXTENDED_FN_4);
 823     __ cpuid();
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 825     __ movl(Address(rsi, 0), rax);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 827     __ movl(Address(rsi, 0), rbx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 829     __ movl(Address(rsi, 0), rcx);
 830     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 831     __ movl(Address(rsi,0), rdx);
 832 
 833     //
 834     // return
 835     //
 836     __ bind(done);
 837     __ popf();
 838     __ pop(rsi);
 839     __ pop(rbx);
 840     __ pop(rbp);
 841     __ ret(0);
 842 
 843 #   undef __
 844 
 845     return start;
 846   };
 847 };
 848 
 849 void VM_Version::get_processor_features() {
 850 
 851   _cpu = 4; // 486 by default
 852   _model = 0;
 853   _stepping = 0;
 854   _logical_processors_per_package = 1;
 855   // i486 internal cache is both I&D and has a 16-byte line size
 856   _L1_data_cache_line_size = 16;
 857 
 858   // Get raw processor info
 859 
 860   get_cpu_info_stub(&_cpuid_info);
 861 
 862   assert_is_initialized();
 863   _cpu = extended_cpu_family();
 864   _model = extended_cpu_model();
 865   _stepping = cpu_stepping();
 866 
 867   if (cpu_family() > 4) { // it supports CPUID
 868     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 869     _cpu_features = _features; // Preserve features
 870     // Logical processors are only available on P4s and above,
 871     // and only if hyperthreading is available.
 872     _logical_processors_per_package = logical_processor_count();
 873     _L1_data_cache_line_size = L1_line_size();
 874   }
 875 
 876   // xchg and xadd instructions
 877   _supports_atomic_getset4 = true;
 878   _supports_atomic_getadd4 = true;
 879   _supports_atomic_getset8 = true;
 880   _supports_atomic_getadd8 = true;
 881 
 882   // OS should support SSE for x64 and hardware should support at least SSE2.
 883   if (!VM_Version::supports_sse2()) {
 884     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 885   }
 886   // in 64 bit the use of SSE2 is the minimum
 887   if (UseSSE < 2) UseSSE = 2;
 888 
 889   // flush_icache_stub have to be generated first.
 890   // That is why Icache line size is hard coded in ICache class,
 891   // see icache_x86.hpp. It is also the reason why we can't use
 892   // clflush instruction in 32-bit VM since it could be running
 893   // on CPU which does not support it.
 894   //
 895   // The only thing we can do is to verify that flushed
 896   // ICache::line_size has correct value.
 897   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 898   // clflush_size is size in quadwords (8 bytes).
 899   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 900 
 901   // assigning this field effectively enables Unsafe.writebackMemory()
 902   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 903   // that is only implemented on x86_64 and only if the OS plays ball
 904   if (os::supports_map_sync()) {
 905     // publish data cache line flush size to generic field, otherwise
 906     // let if default to zero thereby disabling writeback
 907     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 908   }
 909 
 910   // Check if processor has Intel Ecore
 911   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 912     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 913       _model == 0xCC || _model == 0xDD)) {
 914     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 915   }
 916 
 917   if (UseSSE < 4) {
 918     _features.clear_feature(CPU_SSE4_1);
 919     _features.clear_feature(CPU_SSE4_2);
 920   }
 921 
 922   if (UseSSE < 3) {
 923     _features.clear_feature(CPU_SSE3);
 924     _features.clear_feature(CPU_SSSE3);
 925     _features.clear_feature(CPU_SSE4A);
 926   }
 927 
 928   if (UseSSE < 2)
 929     _features.clear_feature(CPU_SSE2);
 930 
 931   if (UseSSE < 1)
 932     _features.clear_feature(CPU_SSE);
 933 
 934   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 935   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 936     UseAVX = 0;
 937   }
 938 
 939   // UseSSE is set to the smaller of what hardware supports and what
 940   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 941   // older Pentiums which do not support it.
 942   int use_sse_limit = 0;
 943   if (UseSSE > 0) {
 944     if (UseSSE > 3 && supports_sse4_1()) {
 945       use_sse_limit = 4;
 946     } else if (UseSSE > 2 && supports_sse3()) {
 947       use_sse_limit = 3;
 948     } else if (UseSSE > 1 && supports_sse2()) {
 949       use_sse_limit = 2;
 950     } else if (UseSSE > 0 && supports_sse()) {
 951       use_sse_limit = 1;
 952     } else {
 953       use_sse_limit = 0;
 954     }
 955   }
 956   if (FLAG_IS_DEFAULT(UseSSE)) {
 957     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 958   } else if (UseSSE > use_sse_limit) {
 959     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 960     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 961   }
 962 
 963   // first try initial setting and detect what we can support
 964   int use_avx_limit = 0;
 965   if (UseAVX > 0) {
 966     if (UseSSE < 4) {
 967       // Don't use AVX if SSE is unavailable or has been disabled.
 968       use_avx_limit = 0;
 969     } else if (UseAVX > 2 && supports_evex()) {
 970       use_avx_limit = 3;
 971     } else if (UseAVX > 1 && supports_avx2()) {
 972       use_avx_limit = 2;
 973     } else if (UseAVX > 0 && supports_avx()) {
 974       use_avx_limit = 1;
 975     } else {
 976       use_avx_limit = 0;
 977     }
 978   }
 979   if (FLAG_IS_DEFAULT(UseAVX)) {
 980     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 981     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 982       FLAG_SET_DEFAULT(UseAVX, 2);
 983     } else {
 984       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 985     }
 986   }
 987 
 988   if (UseAVX > use_avx_limit) {
 989     if (UseSSE < 4) {
 990       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 991     } else {
 992       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 993     }
 994     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 995   }
 996 
 997   if (UseAVX < 3) {
 998     _features.clear_feature(CPU_AVX512F);
 999     _features.clear_feature(CPU_AVX512DQ);
1000     _features.clear_feature(CPU_AVX512CD);
1001     _features.clear_feature(CPU_AVX512BW);
1002     _features.clear_feature(CPU_AVX512ER);
1003     _features.clear_feature(CPU_AVX512PF);
1004     _features.clear_feature(CPU_AVX512VL);
1005     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1006     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1007     _features.clear_feature(CPU_AVX512_VAES);
1008     _features.clear_feature(CPU_AVX512_VNNI);
1009     _features.clear_feature(CPU_AVX512_VBMI);
1010     _features.clear_feature(CPU_AVX512_VBMI2);
1011     _features.clear_feature(CPU_AVX512_BITALG);
1012     _features.clear_feature(CPU_AVX512_IFMA);
1013     _features.clear_feature(CPU_APX_F);
1014     _features.clear_feature(CPU_AVX512_FP16);
1015     _features.clear_feature(CPU_AVX10_1);
1016     _features.clear_feature(CPU_AVX10_2);
1017   }
1018 
1019   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1020   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1021   if (UseAPX && !apx_supported) {
1022     warning("UseAPX is not supported on this CPU, setting it to false");
1023     FLAG_SET_DEFAULT(UseAPX, false);
1024   }
1025 
1026   if (!UseAPX) {
1027     _features.clear_feature(CPU_APX_F);
1028   }
1029 
1030   if (UseAVX < 2) {
1031     _features.clear_feature(CPU_AVX2);
1032     _features.clear_feature(CPU_AVX_IFMA);
1033   }
1034 
1035   if (UseAVX < 1) {
1036     _features.clear_feature(CPU_AVX);
1037     _features.clear_feature(CPU_VZEROUPPER);
1038     _features.clear_feature(CPU_F16C);
1039     _features.clear_feature(CPU_SHA512);
1040   }
1041 
1042   if (logical_processors_per_package() == 1) {
1043     // HT processor could be installed on a system which doesn't support HT.
1044     _features.clear_feature(CPU_HT);
1045   }
1046 
1047   if (is_intel()) { // Intel cpus specific settings
1048     if (is_knights_family()) {
1049       _features.clear_feature(CPU_VZEROUPPER);
1050       _features.clear_feature(CPU_AVX512BW);
1051       _features.clear_feature(CPU_AVX512VL);
1052       _features.clear_feature(CPU_AVX512DQ);
1053       _features.clear_feature(CPU_AVX512_VNNI);
1054       _features.clear_feature(CPU_AVX512_VAES);
1055       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1056       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1057       _features.clear_feature(CPU_AVX512_VBMI);
1058       _features.clear_feature(CPU_AVX512_VBMI2);
1059       _features.clear_feature(CPU_CLWB);
1060       _features.clear_feature(CPU_FLUSHOPT);
1061       _features.clear_feature(CPU_GFNI);
1062       _features.clear_feature(CPU_AVX512_BITALG);
1063       _features.clear_feature(CPU_AVX512_IFMA);
1064       _features.clear_feature(CPU_AVX_IFMA);
1065       _features.clear_feature(CPU_AVX512_FP16);
1066       _features.clear_feature(CPU_AVX10_1);
1067       _features.clear_feature(CPU_AVX10_2);
1068     }
1069   }
1070 
1071   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1072     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1073     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1074   } else {
1075     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1076   }
1077 
1078   assert(supports_clflush(), "Always present");
1079   if (X86ICacheSync == -1) {
1080     // Auto-detect, choosing the best performant one that still flushes
1081     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1082     if (supports_clwb()) {
1083       FLAG_SET_ERGO(X86ICacheSync, 3);
1084     } else if (supports_clflushopt()) {
1085       FLAG_SET_ERGO(X86ICacheSync, 2);
1086     } else {
1087       FLAG_SET_ERGO(X86ICacheSync, 1);
1088     }
1089   } else {
1090     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1091       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1092     }
1093     if ((X86ICacheSync == 3) && !supports_clwb()) {
1094       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1095     }
1096     if ((X86ICacheSync == 5) && !supports_serialize()) {
1097       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1098     }
1099   }
1100 
1101   stringStream ss(2048);
1102   ss.print("(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1103            cores_per_cpu(), threads_per_core(),
1104            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1105   ss.print(", ");
1106   int features_offset = (int)ss.size();
1107   insert_features_names(_features, ss);
1108 
1109   _cpu_info_string = ss.as_string(true);
1110   _features_string = _cpu_info_string + features_offset;
1111 
1112   // Use AES instructions if available.
1113   if (supports_aes()) {
1114     if (FLAG_IS_DEFAULT(UseAES)) {
1115       FLAG_SET_DEFAULT(UseAES, true);
1116     }
1117     if (!UseAES) {
1118       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1119         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1120       }
1121       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1122     } else {
1123       if (UseSSE > 2) {
1124         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1125           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1126         }
1127       } else {
1128         // The AES intrinsic stubs require AES instruction support (of course)
1129         // but also require sse3 mode or higher for instructions it use.
1130         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1131           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1132         }
1133         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1134       }
1135 
1136       // --AES-CTR begins--
1137       if (!UseAESIntrinsics) {
1138         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1139           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1140           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1141         }
1142       } else {
1143         if (supports_sse4_1()) {
1144           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1145             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1146           }
1147         } else {
1148            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1149            // but also require sse4.1 mode or higher for instructions it use.
1150           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1151              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1152            }
1153            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1154         }
1155       }
1156       // --AES-CTR ends--
1157     }
1158   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1159     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1160       warning("AES instructions are not available on this CPU");
1161       FLAG_SET_DEFAULT(UseAES, false);
1162     }
1163     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1164       warning("AES intrinsics are not available on this CPU");
1165       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1166     }
1167     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1168       warning("AES-CTR intrinsics are not available on this CPU");
1169       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1170     }
1171   }
1172 
1173   // Use CLMUL instructions if available.
1174   if (supports_clmul()) {
1175     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1176       UseCLMUL = true;
1177     }
1178   } else if (UseCLMUL) {
1179     if (!FLAG_IS_DEFAULT(UseCLMUL))
1180       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1181     FLAG_SET_DEFAULT(UseCLMUL, false);
1182   }
1183 
1184   if (UseCLMUL && (UseSSE > 2)) {
1185     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1186       UseCRC32Intrinsics = true;
1187     }
1188   } else if (UseCRC32Intrinsics) {
1189     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1190       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1191     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1192   }
1193 
1194   if (supports_avx2()) {
1195     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1196       UseAdler32Intrinsics = true;
1197     }
1198   } else if (UseAdler32Intrinsics) {
1199     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1200       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1201     }
1202     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1203   }
1204 
1205   if (supports_sse4_2() && supports_clmul()) {
1206     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1207       UseCRC32CIntrinsics = true;
1208     }
1209   } else if (UseCRC32CIntrinsics) {
1210     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1211       warning("CRC32C intrinsics are not available on this CPU");
1212     }
1213     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1214   }
1215 
1216   // GHASH/GCM intrinsics
1217   if (UseCLMUL && (UseSSE > 2)) {
1218     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1219       UseGHASHIntrinsics = true;
1220     }
1221   } else if (UseGHASHIntrinsics) {
1222     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1223       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1224     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1225   }
1226 
1227   // ChaCha20 Intrinsics
1228   // As long as the system supports AVX as a baseline we can do a
1229   // SIMD-enabled block function.  StubGenerator makes the determination
1230   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1231   // version.
1232   if (UseAVX >= 1) {
1233       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1234           UseChaCha20Intrinsics = true;
1235       }
1236   } else if (UseChaCha20Intrinsics) {
1237       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1238           warning("ChaCha20 intrinsic requires AVX instructions");
1239       }
1240       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1241   }
1242 
1243   // Kyber Intrinsics
1244   // Currently we only have them for AVX512
1245 #ifdef _LP64
1246   if (supports_evex() && supports_avx512bw()) {
1247       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1248           UseKyberIntrinsics = true;
1249       }
1250   } else
1251 #endif
1252   if (UseKyberIntrinsics) {
1253      warning("Intrinsics for ML-KEM are not available on this CPU.");
1254      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1255   }
1256 
1257   // Dilithium Intrinsics
1258   // Currently we only have them for AVX512
1259   if (supports_evex() && supports_avx512bw()) {
1260       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1261           UseDilithiumIntrinsics = true;
1262       }
1263   } else if (UseDilithiumIntrinsics) {
1264       warning("Intrinsics for ML-DSA are not available on this CPU.");
1265       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1266   }
1267 
1268   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1269   if (UseAVX >= 2) {
1270     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1271       UseBASE64Intrinsics = true;
1272     }
1273   } else if (UseBASE64Intrinsics) {
1274      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1275       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1276     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1277   }
1278 
1279   if (supports_fma()) {
1280     if (FLAG_IS_DEFAULT(UseFMA)) {
1281       UseFMA = true;
1282     }
1283   } else if (UseFMA) {
1284     warning("FMA instructions are not available on this CPU");
1285     FLAG_SET_DEFAULT(UseFMA, false);
1286   }
1287 
1288   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1289     UseMD5Intrinsics = true;
1290   }
1291 
1292   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1293     if (FLAG_IS_DEFAULT(UseSHA)) {
1294       UseSHA = true;
1295     }
1296   } else if (UseSHA) {
1297     warning("SHA instructions are not available on this CPU");
1298     FLAG_SET_DEFAULT(UseSHA, false);
1299   }
1300 
1301   if (supports_sha() && supports_sse4_1() && UseSHA) {
1302     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1303       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1304     }
1305   } else if (UseSHA1Intrinsics) {
1306     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1307     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1308   }
1309 
1310   if (supports_sse4_1() && UseSHA) {
1311     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1312       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1313     }
1314   } else if (UseSHA256Intrinsics) {
1315     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1316     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1317   }
1318 
1319   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1320     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1321       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1322     }
1323   } else if (UseSHA512Intrinsics) {
1324     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1325     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1326   }
1327 
1328   if (supports_evex() && supports_avx512bw()) {
1329       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1330           UseSHA3Intrinsics = true;
1331       }
1332   } else if (UseSHA3Intrinsics) {
1333       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1334       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1335   }
1336 
1337   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1338     FLAG_SET_DEFAULT(UseSHA, false);
1339   }
1340 
1341 #if COMPILER2_OR_JVMCI
1342   int max_vector_size = 0;
1343   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1344     // 16 byte vectors (in XMM) are supported with SSE2+
1345     max_vector_size = 16;
1346   } else if (UseAVX == 1 || UseAVX == 2) {
1347     // 32 bytes vectors (in YMM) are only supported with AVX+
1348     max_vector_size = 32;
1349   } else if (UseAVX > 2) {
1350     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1351     max_vector_size = 64;
1352   }
1353 
1354   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1355 
1356   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1357     if (MaxVectorSize < min_vector_size) {
1358       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1359       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1360     }
1361     if (MaxVectorSize > max_vector_size) {
1362       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1363       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1364     }
1365     if (!is_power_of_2(MaxVectorSize)) {
1366       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1367       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1368     }
1369   } else {
1370     // If default, use highest supported configuration
1371     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1372   }
1373 
1374 #if defined(COMPILER2) && defined(ASSERT)
1375   if (MaxVectorSize > 0) {
1376     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1377       tty->print_cr("State of YMM registers after signal handle:");
1378       int nreg = 4;
1379       const char* ymm_name[4] = {"0", "7", "8", "15"};
1380       for (int i = 0; i < nreg; i++) {
1381         tty->print("YMM%s:", ymm_name[i]);
1382         for (int j = 7; j >=0; j--) {
1383           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1384         }
1385         tty->cr();
1386       }
1387     }
1388   }
1389 #endif // COMPILER2 && ASSERT
1390 
1391   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1392     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1393       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1394     }
1395   } else if (UsePoly1305Intrinsics) {
1396     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1397     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1398   }
1399 
1400   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1401     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1402       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1403     }
1404   } else if (UseIntPolyIntrinsics) {
1405     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1406     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1407   }
1408 
1409   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1410     UseMultiplyToLenIntrinsic = true;
1411   }
1412   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1413     UseSquareToLenIntrinsic = true;
1414   }
1415   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1416     UseMulAddIntrinsic = true;
1417   }
1418   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1419     UseMontgomeryMultiplyIntrinsic = true;
1420   }
1421   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1422     UseMontgomerySquareIntrinsic = true;
1423   }
1424 #endif // COMPILER2_OR_JVMCI
1425 
1426   // On new cpus instructions which update whole XMM register should be used
1427   // to prevent partial register stall due to dependencies on high half.
1428   //
1429   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1430   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1431   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1432   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1433 
1434 
1435   if (is_zx()) { // ZX cpus specific settings
1436     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1437       UseStoreImmI16 = false; // don't use it on ZX cpus
1438     }
1439     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1440       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1441         // Use it on all ZX cpus
1442         UseAddressNop = true;
1443       }
1444     }
1445     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1446       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1447     }
1448     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1449       if (supports_sse3()) {
1450         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1451       } else {
1452         UseXmmRegToRegMoveAll = false;
1453       }
1454     }
1455     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1456 #ifdef COMPILER2
1457       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1458         // For new ZX cpus do the next optimization:
1459         // don't align the beginning of a loop if there are enough instructions
1460         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1461         // in current fetch line (OptoLoopAlignment) or the padding
1462         // is big (> MaxLoopPad).
1463         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1464         // generated NOP instructions. 11 is the largest size of one
1465         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1466         MaxLoopPad = 11;
1467       }
1468 #endif // COMPILER2
1469       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1470         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1471       }
1472       if (supports_sse4_2()) { // new ZX cpus
1473         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1474           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1475         }
1476       }
1477     }
1478 
1479     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1480       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1481     }
1482   }
1483 
1484   if (is_amd_family()) { // AMD cpus specific settings
1485     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1486       // Use it on new AMD cpus starting from Opteron.
1487       UseAddressNop = true;
1488     }
1489     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1490       // Use it on new AMD cpus starting from Opteron.
1491       UseNewLongLShift = true;
1492     }
1493     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1494       if (supports_sse4a()) {
1495         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1496       } else {
1497         UseXmmLoadAndClearUpper = false;
1498       }
1499     }
1500     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1501       if (supports_sse4a()) {
1502         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1503       } else {
1504         UseXmmRegToRegMoveAll = false;
1505       }
1506     }
1507     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1508       if (supports_sse4a()) {
1509         UseXmmI2F = true;
1510       } else {
1511         UseXmmI2F = false;
1512       }
1513     }
1514     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1515       if (supports_sse4a()) {
1516         UseXmmI2D = true;
1517       } else {
1518         UseXmmI2D = false;
1519       }
1520     }
1521 
1522     // some defaults for AMD family 15h
1523     if (cpu_family() == 0x15) {
1524       // On family 15h processors default is no sw prefetch
1525       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1526         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1527       }
1528       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1529       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1530         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1531       }
1532       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1533       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1534         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1535       }
1536       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1537         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1538       }
1539     }
1540 
1541 #ifdef COMPILER2
1542     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1543       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1544       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1545     }
1546 #endif // COMPILER2
1547 
1548     // Some defaults for AMD family >= 17h && Hygon family 18h
1549     if (cpu_family() >= 0x17) {
1550       // On family >=17h processors use XMM and UnalignedLoadStores
1551       // for Array Copy
1552       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1553         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1554       }
1555       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1556         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1557       }
1558 #ifdef COMPILER2
1559       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1560         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1561       }
1562 #endif
1563     }
1564   }
1565 
1566   if (is_intel()) { // Intel cpus specific settings
1567     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1568       UseStoreImmI16 = false; // don't use it on Intel cpus
1569     }
1570     if (is_intel_server_family() || cpu_family() == 15) {
1571       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1572         // Use it on all Intel cpus starting from PentiumPro
1573         UseAddressNop = true;
1574       }
1575     }
1576     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1577       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1578     }
1579     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1580       if (supports_sse3()) {
1581         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1582       } else {
1583         UseXmmRegToRegMoveAll = false;
1584       }
1585     }
1586     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1587 #ifdef COMPILER2
1588       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1589         // For new Intel cpus do the next optimization:
1590         // don't align the beginning of a loop if there are enough instructions
1591         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1592         // in current fetch line (OptoLoopAlignment) or the padding
1593         // is big (> MaxLoopPad).
1594         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1595         // generated NOP instructions. 11 is the largest size of one
1596         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1597         MaxLoopPad = 11;
1598       }
1599 #endif // COMPILER2
1600 
1601       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1602         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1603       }
1604       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1605         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1606           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1607         }
1608       }
1609     }
1610     if (is_atom_family() || is_knights_family()) {
1611 #ifdef COMPILER2
1612       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1613         OptoScheduling = true;
1614       }
1615 #endif
1616       if (supports_sse4_2()) { // Silvermont
1617         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1618           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1619         }
1620       }
1621       if (FLAG_IS_DEFAULT(UseIncDec)) {
1622         FLAG_SET_DEFAULT(UseIncDec, false);
1623       }
1624     }
1625     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1626       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1627     }
1628 #ifdef COMPILER2
1629     if (UseAVX > 2) {
1630       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1631           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1632            ArrayOperationPartialInlineSize != 0 &&
1633            ArrayOperationPartialInlineSize != 16 &&
1634            ArrayOperationPartialInlineSize != 32 &&
1635            ArrayOperationPartialInlineSize != 64)) {
1636         int inline_size = 0;
1637         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1638           inline_size = 64;
1639         } else if (MaxVectorSize >= 32) {
1640           inline_size = 32;
1641         } else if (MaxVectorSize >= 16) {
1642           inline_size = 16;
1643         }
1644         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1645           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1646         }
1647         ArrayOperationPartialInlineSize = inline_size;
1648       }
1649 
1650       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1651         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1652         if (ArrayOperationPartialInlineSize) {
1653           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1654         } else {
1655           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1656         }
1657       }
1658     }
1659 #endif
1660   }
1661 
1662 #ifdef COMPILER2
1663   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1664     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1665       OptimizeFill = false;
1666     }
1667   }
1668 #endif
1669   if (supports_sse4_2()) {
1670     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1671       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1672     }
1673   } else {
1674     if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1675       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1676     }
1677     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1678   }
1679   if (UseSSE42Intrinsics) {
1680     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1681       UseVectorizedMismatchIntrinsic = true;
1682     }
1683   } else if (UseVectorizedMismatchIntrinsic) {
1684     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1685       warning("vectorizedMismatch intrinsics are not available on this CPU");
1686     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1687   }
1688   if (UseAVX >= 2) {
1689     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1690   } else if (UseVectorizedHashCodeIntrinsic) {
1691     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1692       warning("vectorizedHashCode intrinsics are not available on this CPU");
1693     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1694   }
1695 
1696   // Use count leading zeros count instruction if available.
1697   if (supports_lzcnt()) {
1698     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1699       UseCountLeadingZerosInstruction = true;
1700     }
1701    } else if (UseCountLeadingZerosInstruction) {
1702     warning("lzcnt instruction is not available on this CPU");
1703     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1704   }
1705 
1706   // Use count trailing zeros instruction if available
1707   if (supports_bmi1()) {
1708     // tzcnt does not require VEX prefix
1709     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1710       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1711         // Don't use tzcnt if BMI1 is switched off on command line.
1712         UseCountTrailingZerosInstruction = false;
1713       } else {
1714         UseCountTrailingZerosInstruction = true;
1715       }
1716     }
1717   } else if (UseCountTrailingZerosInstruction) {
1718     warning("tzcnt instruction is not available on this CPU");
1719     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1720   }
1721 
1722   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1723   // VEX prefix is generated only when AVX > 0.
1724   if (supports_bmi1() && supports_avx()) {
1725     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1726       UseBMI1Instructions = true;
1727     }
1728   } else if (UseBMI1Instructions) {
1729     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1730     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1731   }
1732 
1733   if (supports_bmi2() && supports_avx()) {
1734     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1735       UseBMI2Instructions = true;
1736     }
1737   } else if (UseBMI2Instructions) {
1738     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1739     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1740   }
1741 
1742   // Use population count instruction if available.
1743   if (supports_popcnt()) {
1744     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1745       UsePopCountInstruction = true;
1746     }
1747   } else if (UsePopCountInstruction) {
1748     warning("POPCNT instruction is not available on this CPU");
1749     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1750   }
1751 
1752   // Use fast-string operations if available.
1753   if (supports_erms()) {
1754     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1755       UseFastStosb = true;
1756     }
1757   } else if (UseFastStosb) {
1758     warning("fast-string operations are not available on this CPU");
1759     FLAG_SET_DEFAULT(UseFastStosb, false);
1760   }
1761 
1762   // For AMD Processors use XMM/YMM MOVDQU instructions
1763   // for Object Initialization as default
1764   if (is_amd() && cpu_family() >= 0x19) {
1765     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1766       UseFastStosb = false;
1767     }
1768   }
1769 
1770 #ifdef COMPILER2
1771   if (is_intel() && MaxVectorSize > 16) {
1772     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1773       UseFastStosb = false;
1774     }
1775   }
1776 #endif
1777 
1778   // Use XMM/YMM MOVDQU instruction for Object Initialization
1779   if (UseUnalignedLoadStores) {
1780     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1781       UseXMMForObjInit = true;
1782     }
1783   } else if (UseXMMForObjInit) {
1784     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1785     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1786   }
1787 
1788 #ifdef COMPILER2
1789   if (FLAG_IS_DEFAULT(AlignVector)) {
1790     // Modern processors allow misaligned memory operations for vectors.
1791     AlignVector = !UseUnalignedLoadStores;
1792   }
1793 #endif // COMPILER2
1794 
1795   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1796     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1797       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1798     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1799       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1800     }
1801   }
1802 
1803   // Allocation prefetch settings
1804   int cache_line_size = checked_cast<int>(prefetch_data_size());
1805   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1806       (cache_line_size > AllocatePrefetchStepSize)) {
1807     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1808   }
1809 
1810   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1811     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1812     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1813       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1814     }
1815     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1816   }
1817 
1818   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1819     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1820     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1821   }
1822 
1823   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1824     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1825         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1826       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1827     }
1828 #ifdef COMPILER2
1829     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1830       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1831     }
1832 #endif
1833   }
1834 
1835   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1836 #ifdef COMPILER2
1837     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1838       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1839     }
1840 #endif
1841   }
1842 
1843   // Prefetch settings
1844 
1845   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1846   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1847   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1848   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1849 
1850   // gc copy/scan is disabled if prefetchw isn't supported, because
1851   // Prefetch::write emits an inlined prefetchw on Linux.
1852   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1853   // The used prefetcht0 instruction works for both amd64 and em64t.
1854 
1855   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1856     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1857   }
1858   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1859     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1860   }
1861 
1862   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1863      (cache_line_size > ContendedPaddingWidth))
1864      ContendedPaddingWidth = cache_line_size;
1865 
1866   // This machine allows unaligned memory accesses
1867   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1868     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1869   }
1870 
1871 #ifndef PRODUCT
1872   if (log_is_enabled(Info, os, cpu)) {
1873     LogStream ls(Log(os, cpu)::info());
1874     outputStream* log = &ls;
1875     log->print_cr("Logical CPUs per core: %u",
1876                   logical_processors_per_package());
1877     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1878     log->print("UseSSE=%d", UseSSE);
1879     if (UseAVX > 0) {
1880       log->print("  UseAVX=%d", UseAVX);
1881     }
1882     if (UseAES) {
1883       log->print("  UseAES=1");
1884     }
1885 #ifdef COMPILER2
1886     if (MaxVectorSize > 0) {
1887       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1888     }
1889 #endif
1890     log->cr();
1891     log->print("Allocation");
1892     if (AllocatePrefetchStyle <= 0) {
1893       log->print_cr(": no prefetching");
1894     } else {
1895       log->print(" prefetching: ");
1896       if (AllocatePrefetchInstr == 0) {
1897         log->print("PREFETCHNTA");
1898       } else if (AllocatePrefetchInstr == 1) {
1899         log->print("PREFETCHT0");
1900       } else if (AllocatePrefetchInstr == 2) {
1901         log->print("PREFETCHT2");
1902       } else if (AllocatePrefetchInstr == 3) {
1903         log->print("PREFETCHW");
1904       }
1905       if (AllocatePrefetchLines > 1) {
1906         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1907       } else {
1908         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1909       }
1910     }
1911 
1912     if (PrefetchCopyIntervalInBytes > 0) {
1913       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1914     }
1915     if (PrefetchScanIntervalInBytes > 0) {
1916       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1917     }
1918     if (ContendedPaddingWidth > 0) {
1919       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1920     }
1921   }
1922 #endif // !PRODUCT
1923   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1924       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1925   }
1926   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1927       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1928   }
1929 }
1930 
1931 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1932   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1933   if (vrt == XenHVM) {
1934     st->print_cr("Xen hardware-assisted virtualization detected");
1935   } else if (vrt == KVM) {
1936     st->print_cr("KVM virtualization detected");
1937   } else if (vrt == VMWare) {
1938     st->print_cr("VMWare virtualization detected");
1939     VirtualizationSupport::print_virtualization_info(st);
1940   } else if (vrt == HyperV) {
1941     st->print_cr("Hyper-V virtualization detected");
1942   } else if (vrt == HyperVRole) {
1943     st->print_cr("Hyper-V role detected");
1944   }
1945 }
1946 
1947 bool VM_Version::compute_has_intel_jcc_erratum() {
1948   if (!is_intel_family_core()) {
1949     // Only Intel CPUs are affected.
1950     return false;
1951   }
1952   // The following table of affected CPUs is based on the following document released by Intel:
1953   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1954   switch (_model) {
1955   case 0x8E:
1956     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1957     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1958     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1959     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1960     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1961     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1962     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1963     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1964     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1965     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1966   case 0x4E:
1967     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1968     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1969     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1970     return _stepping == 0x3;
1971   case 0x55:
1972     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1973     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1974     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1975     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1976     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1977     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1978     return _stepping == 0x4 || _stepping == 0x7;
1979   case 0x5E:
1980     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1981     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1982     return _stepping == 0x3;
1983   case 0x9E:
1984     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1985     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1986     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1987     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1988     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1989     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1990     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1991     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1992     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1993     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1994     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1995     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1996     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1997     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1998     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1999   case 0xA5:
2000     // Not in Intel documentation.
2001     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2002     return true;
2003   case 0xA6:
2004     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2005     return _stepping == 0x0;
2006   case 0xAE:
2007     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2008     return _stepping == 0xA;
2009   default:
2010     // If we are running on another intel machine not recognized in the table, we are okay.
2011     return false;
2012   }
2013 }
2014 
2015 // On Xen, the cpuid instruction returns
2016 //  eax / registers[0]: Version of Xen
2017 //  ebx / registers[1]: chars 'XenV'
2018 //  ecx / registers[2]: chars 'MMXe'
2019 //  edx / registers[3]: chars 'nVMM'
2020 //
2021 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2022 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2023 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2024 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2025 //
2026 // more information :
2027 // https://kb.vmware.com/s/article/1009458
2028 //
2029 void VM_Version::check_virtualizations() {
2030   uint32_t registers[4] = {0};
2031   char signature[13] = {0};
2032 
2033   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2034   // from 0x40000000 until 0x40010000.
2035   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2036   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2037     detect_virt_stub(leaf, registers);
2038     memcpy(signature, &registers[1], 12);
2039 
2040     if (strncmp("VMwareVMware", signature, 12) == 0) {
2041       Abstract_VM_Version::_detected_virtualization = VMWare;
2042       // check for extended metrics from guestlib
2043       VirtualizationSupport::initialize();
2044     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2045       Abstract_VM_Version::_detected_virtualization = HyperV;
2046 #ifdef _WINDOWS
2047       // CPUID leaf 0x40000007 is available to the root partition only.
2048       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2049       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2050       detect_virt_stub(0x40000007, registers);
2051       if ((registers[0] != 0x0) ||
2052           (registers[1] != 0x0) ||
2053           (registers[2] != 0x0) ||
2054           (registers[3] != 0x0)) {
2055         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2056       }
2057 #endif
2058     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2059       Abstract_VM_Version::_detected_virtualization = KVM;
2060     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2061       Abstract_VM_Version::_detected_virtualization = XenHVM;
2062     }
2063   }
2064 }
2065 
2066 #ifdef COMPILER2
2067 // Determine if it's running on Cascade Lake using default options.
2068 bool VM_Version::is_default_intel_cascade_lake() {
2069   return FLAG_IS_DEFAULT(UseAVX) &&
2070          FLAG_IS_DEFAULT(MaxVectorSize) &&
2071          UseAVX > 2 &&
2072          is_intel_cascade_lake();
2073 }
2074 #endif
2075 
2076 bool VM_Version::is_intel_cascade_lake() {
2077   return is_intel_skylake() && _stepping >= 5;
2078 }
2079 
2080 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2081 // for implementing the array copy and clear operations.
2082 // The Intel platforms that supports the serialize instruction
2083 // has improved implementation of 64-byte load/stores and so the default
2084 // threshold is set to 0 for these platforms.
2085 int VM_Version::avx3_threshold() {
2086   return (is_intel_server_family() &&
2087           supports_serialize() &&
2088           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2089 }
2090 
2091 void VM_Version::clear_apx_test_state() {
2092   clear_apx_test_state_stub();
2093 }
2094 
2095 static bool _vm_version_initialized = false;
2096 
2097 void VM_Version::initialize() {
2098   ResourceMark rm;
2099 
2100   // Making this stub must be FIRST use of assembler
2101   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2102   if (stub_blob == nullptr) {
2103     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2104   }
2105   CodeBuffer c(stub_blob);
2106   VM_Version_StubGenerator g(&c);
2107 
2108   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2109                                      g.generate_get_cpu_info());
2110   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2111                                      g.generate_detect_virt());
2112   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2113                                      g.clear_apx_test_state());
2114   get_processor_features();
2115 
2116   Assembler::precompute_instructions();
2117 
2118   if (VM_Version::supports_hv()) { // Supports hypervisor
2119     check_virtualizations();
2120   }
2121   _vm_version_initialized = true;
2122 }
2123 
2124 typedef enum {
2125    CPU_FAMILY_8086_8088  = 0,
2126    CPU_FAMILY_INTEL_286  = 2,
2127    CPU_FAMILY_INTEL_386  = 3,
2128    CPU_FAMILY_INTEL_486  = 4,
2129    CPU_FAMILY_PENTIUM    = 5,
2130    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2131    CPU_FAMILY_PENTIUM_4  = 0xF
2132 } FamilyFlag;
2133 
2134 typedef enum {
2135   RDTSCP_FLAG  = 0x08000000, // bit 27
2136   INTEL64_FLAG = 0x20000000  // bit 29
2137 } _featureExtendedEdxFlag;
2138 
2139 typedef enum {
2140    FPU_FLAG     = 0x00000001,
2141    VME_FLAG     = 0x00000002,
2142    DE_FLAG      = 0x00000004,
2143    PSE_FLAG     = 0x00000008,
2144    TSC_FLAG     = 0x00000010,
2145    MSR_FLAG     = 0x00000020,
2146    PAE_FLAG     = 0x00000040,
2147    MCE_FLAG     = 0x00000080,
2148    CX8_FLAG     = 0x00000100,
2149    APIC_FLAG    = 0x00000200,
2150    SEP_FLAG     = 0x00000800,
2151    MTRR_FLAG    = 0x00001000,
2152    PGE_FLAG     = 0x00002000,
2153    MCA_FLAG     = 0x00004000,
2154    CMOV_FLAG    = 0x00008000,
2155    PAT_FLAG     = 0x00010000,
2156    PSE36_FLAG   = 0x00020000,
2157    PSNUM_FLAG   = 0x00040000,
2158    CLFLUSH_FLAG = 0x00080000,
2159    DTS_FLAG     = 0x00200000,
2160    ACPI_FLAG    = 0x00400000,
2161    MMX_FLAG     = 0x00800000,
2162    FXSR_FLAG    = 0x01000000,
2163    SSE_FLAG     = 0x02000000,
2164    SSE2_FLAG    = 0x04000000,
2165    SS_FLAG      = 0x08000000,
2166    HTT_FLAG     = 0x10000000,
2167    TM_FLAG      = 0x20000000
2168 } FeatureEdxFlag;
2169 
2170 static BufferBlob* cpuid_brand_string_stub_blob;
2171 static const int   cpuid_brand_string_stub_size = 550;
2172 
2173 extern "C" {
2174   typedef void (*getCPUIDBrandString_stub_t)(void*);
2175 }
2176 
2177 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2178 
2179 // VM_Version statics
2180 enum {
2181   ExtendedFamilyIdLength_INTEL = 16,
2182   ExtendedFamilyIdLength_AMD   = 24
2183 };
2184 
2185 const size_t VENDOR_LENGTH = 13;
2186 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2187 static char* _cpu_brand_string = nullptr;
2188 static int64_t _max_qualified_cpu_frequency = 0;
2189 
2190 static int _no_of_threads = 0;
2191 static int _no_of_cores = 0;
2192 
2193 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2194   "8086/8088",
2195   "",
2196   "286",
2197   "386",
2198   "486",
2199   "Pentium",
2200   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2201   "",
2202   "",
2203   "",
2204   "",
2205   "",
2206   "",
2207   "",
2208   "",
2209   "Pentium 4"
2210 };
2211 
2212 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2213   "",
2214   "",
2215   "",
2216   "",
2217   "5x86",
2218   "K5/K6",
2219   "Athlon/AthlonXP",
2220   "",
2221   "",
2222   "",
2223   "",
2224   "",
2225   "",
2226   "",
2227   "",
2228   "Opteron/Athlon64",
2229   "Opteron QC/Phenom",  // Barcelona et.al.
2230   "",
2231   "",
2232   "",
2233   "",
2234   "",
2235   "",
2236   "Zen"
2237 };
2238 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2239 // September 2013, Vol 3C Table 35-1
2240 const char* const _model_id_pentium_pro[] = {
2241   "",
2242   "Pentium Pro",
2243   "",
2244   "Pentium II model 3",
2245   "",
2246   "Pentium II model 5/Xeon/Celeron",
2247   "Celeron",
2248   "Pentium III/Pentium III Xeon",
2249   "Pentium III/Pentium III Xeon",
2250   "Pentium M model 9",    // Yonah
2251   "Pentium III, model A",
2252   "Pentium III, model B",
2253   "",
2254   "Pentium M model D",    // Dothan
2255   "",
2256   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2257   "",
2258   "",
2259   "",
2260   "",
2261   "",
2262   "",
2263   "Celeron",              // 0x16 Celeron 65nm
2264   "Core 2",               // 0x17 Penryn / Harpertown
2265   "",
2266   "",
2267   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2268   "Atom",                 // 0x1B Z5xx series Silverthorn
2269   "",
2270   "Core 2",               // 0x1D Dunnington (6-core)
2271   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2272   "",
2273   "",
2274   "",
2275   "",
2276   "",
2277   "",
2278   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2279   "",
2280   "",
2281   "",                     // 0x28
2282   "",
2283   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2284   "",
2285   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2286   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2287   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2288   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2289   "",
2290   "",
2291   "",
2292   "",
2293   "",
2294   "",
2295   "",
2296   "",
2297   "",
2298   "",
2299   "Ivy Bridge",           // 0x3a
2300   "",
2301   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2302   "",                     // 0x3d "Next Generation Intel Core Processor"
2303   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2304   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2305   "",
2306   "",
2307   "",
2308   "",
2309   "",
2310   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2311   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2312   nullptr
2313 };
2314 
2315 /* Brand ID is for back compatibility
2316  * Newer CPUs uses the extended brand string */
2317 const char* const _brand_id[] = {
2318   "",
2319   "Celeron processor",
2320   "Pentium III processor",
2321   "Intel Pentium III Xeon processor",
2322   "",
2323   "",
2324   "",
2325   "",
2326   "Intel Pentium 4 processor",
2327   nullptr
2328 };
2329 
2330 
2331 const char* const _feature_edx_id[] = {
2332   "On-Chip FPU",
2333   "Virtual Mode Extensions",
2334   "Debugging Extensions",
2335   "Page Size Extensions",
2336   "Time Stamp Counter",
2337   "Model Specific Registers",
2338   "Physical Address Extension",
2339   "Machine Check Exceptions",
2340   "CMPXCHG8B Instruction",
2341   "On-Chip APIC",
2342   "",
2343   "Fast System Call",
2344   "Memory Type Range Registers",
2345   "Page Global Enable",
2346   "Machine Check Architecture",
2347   "Conditional Mov Instruction",
2348   "Page Attribute Table",
2349   "36-bit Page Size Extension",
2350   "Processor Serial Number",
2351   "CLFLUSH Instruction",
2352   "",
2353   "Debug Trace Store feature",
2354   "ACPI registers in MSR space",
2355   "Intel Architecture MMX Technology",
2356   "Fast Float Point Save and Restore",
2357   "Streaming SIMD extensions",
2358   "Streaming SIMD extensions 2",
2359   "Self-Snoop",
2360   "Hyper Threading",
2361   "Thermal Monitor",
2362   "",
2363   "Pending Break Enable"
2364 };
2365 
2366 const char* const _feature_extended_edx_id[] = {
2367   "",
2368   "",
2369   "",
2370   "",
2371   "",
2372   "",
2373   "",
2374   "",
2375   "",
2376   "",
2377   "",
2378   "SYSCALL/SYSRET",
2379   "",
2380   "",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "Execute Disable Bit",
2388   "",
2389   "",
2390   "",
2391   "",
2392   "",
2393   "",
2394   "RDTSCP",
2395   "",
2396   "Intel 64 Architecture",
2397   "",
2398   ""
2399 };
2400 
2401 const char* const _feature_ecx_id[] = {
2402   "Streaming SIMD Extensions 3",
2403   "PCLMULQDQ",
2404   "64-bit DS Area",
2405   "MONITOR/MWAIT instructions",
2406   "CPL Qualified Debug Store",
2407   "Virtual Machine Extensions",
2408   "Safer Mode Extensions",
2409   "Enhanced Intel SpeedStep technology",
2410   "Thermal Monitor 2",
2411   "Supplemental Streaming SIMD Extensions 3",
2412   "L1 Context ID",
2413   "",
2414   "Fused Multiply-Add",
2415   "CMPXCHG16B",
2416   "xTPR Update Control",
2417   "Perfmon and Debug Capability",
2418   "",
2419   "Process-context identifiers",
2420   "Direct Cache Access",
2421   "Streaming SIMD extensions 4.1",
2422   "Streaming SIMD extensions 4.2",
2423   "x2APIC",
2424   "MOVBE",
2425   "Popcount instruction",
2426   "TSC-Deadline",
2427   "AESNI",
2428   "XSAVE",
2429   "OSXSAVE",
2430   "AVX",
2431   "F16C",
2432   "RDRAND",
2433   ""
2434 };
2435 
2436 const char* const _feature_extended_ecx_id[] = {
2437   "LAHF/SAHF instruction support",
2438   "Core multi-processor legacy mode",
2439   "",
2440   "",
2441   "",
2442   "Advanced Bit Manipulations: LZCNT",
2443   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2444   "Misaligned SSE mode",
2445   "",
2446   "",
2447   "",
2448   "",
2449   "",
2450   "",
2451   "",
2452   "",
2453   "",
2454   "",
2455   "",
2456   "",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   ""
2469 };
2470 
2471 void VM_Version::initialize_tsc(void) {
2472   ResourceMark rm;
2473 
2474   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2475   if (cpuid_brand_string_stub_blob == nullptr) {
2476     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2477   }
2478   CodeBuffer c(cpuid_brand_string_stub_blob);
2479   VM_Version_StubGenerator g(&c);
2480   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2481                                    g.generate_getCPUIDBrandString());
2482 }
2483 
2484 const char* VM_Version::cpu_model_description(void) {
2485   uint32_t cpu_family = extended_cpu_family();
2486   uint32_t cpu_model = extended_cpu_model();
2487   const char* model = nullptr;
2488 
2489   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2490     for (uint32_t i = 0; i <= cpu_model; i++) {
2491       model = _model_id_pentium_pro[i];
2492       if (model == nullptr) {
2493         break;
2494       }
2495     }
2496   }
2497   return model;
2498 }
2499 
2500 const char* VM_Version::cpu_brand_string(void) {
2501   if (_cpu_brand_string == nullptr) {
2502     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2503     if (nullptr == _cpu_brand_string) {
2504       return nullptr;
2505     }
2506     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2507     if (ret_val != OS_OK) {
2508       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2509       _cpu_brand_string = nullptr;
2510     }
2511   }
2512   return _cpu_brand_string;
2513 }
2514 
2515 const char* VM_Version::cpu_brand(void) {
2516   const char*  brand  = nullptr;
2517 
2518   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2519     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2520     brand = _brand_id[0];
2521     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2522       brand = _brand_id[i];
2523     }
2524   }
2525   return brand;
2526 }
2527 
2528 bool VM_Version::cpu_is_em64t(void) {
2529   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2530 }
2531 
2532 bool VM_Version::is_netburst(void) {
2533   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2534 }
2535 
2536 bool VM_Version::supports_tscinv_ext(void) {
2537   if (!supports_tscinv_bit()) {
2538     return false;
2539   }
2540 
2541   if (is_intel()) {
2542     return true;
2543   }
2544 
2545   if (is_amd()) {
2546     return !is_amd_Barcelona();
2547   }
2548 
2549   if (is_hygon()) {
2550     return true;
2551   }
2552 
2553   return false;
2554 }
2555 
2556 void VM_Version::resolve_cpu_information_details(void) {
2557 
2558   // in future we want to base this information on proper cpu
2559   // and cache topology enumeration such as:
2560   // Intel 64 Architecture Processor Topology Enumeration
2561   // which supports system cpu and cache topology enumeration
2562   // either using 2xAPICIDs or initial APICIDs
2563 
2564   // currently only rough cpu information estimates
2565   // which will not necessarily reflect the exact configuration of the system
2566 
2567   // this is the number of logical hardware threads
2568   // visible to the operating system
2569   _no_of_threads = os::processor_count();
2570 
2571   // find out number of threads per cpu package
2572   int threads_per_package = threads_per_core() * cores_per_cpu();
2573 
2574   // use amount of threads visible to the process in order to guess number of sockets
2575   _no_of_sockets = _no_of_threads / threads_per_package;
2576 
2577   // process might only see a subset of the total number of threads
2578   // from a single processor package. Virtualization/resource management for example.
2579   // If so then just write a hard 1 as num of pkgs.
2580   if (0 == _no_of_sockets) {
2581     _no_of_sockets = 1;
2582   }
2583 
2584   // estimate the number of cores
2585   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2586 }
2587 
2588 
2589 const char* VM_Version::cpu_family_description(void) {
2590   int cpu_family_id = extended_cpu_family();
2591   if (is_amd()) {
2592     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2593       return _family_id_amd[cpu_family_id];
2594     }
2595   }
2596   if (is_intel()) {
2597     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2598       return cpu_model_description();
2599     }
2600     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2601       return _family_id_intel[cpu_family_id];
2602     }
2603   }
2604   if (is_hygon()) {
2605     return "Dhyana";
2606   }
2607   return "Unknown x86";
2608 }
2609 
2610 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2611   assert(buf != nullptr, "buffer is null!");
2612   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2613 
2614   const char* cpu_type = nullptr;
2615   const char* x64 = nullptr;
2616 
2617   if (is_intel()) {
2618     cpu_type = "Intel";
2619     x64 = cpu_is_em64t() ? " Intel64" : "";
2620   } else if (is_amd()) {
2621     cpu_type = "AMD";
2622     x64 = cpu_is_em64t() ? " AMD64" : "";
2623   } else if (is_hygon()) {
2624     cpu_type = "Hygon";
2625     x64 = cpu_is_em64t() ? " AMD64" : "";
2626   } else {
2627     cpu_type = "Unknown x86";
2628     x64 = cpu_is_em64t() ? " x86_64" : "";
2629   }
2630 
2631   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2632     cpu_type,
2633     cpu_family_description(),
2634     supports_ht() ? " (HT)" : "",
2635     supports_sse3() ? " SSE3" : "",
2636     supports_ssse3() ? " SSSE3" : "",
2637     supports_sse4_1() ? " SSE4.1" : "",
2638     supports_sse4_2() ? " SSE4.2" : "",
2639     supports_sse4a() ? " SSE4A" : "",
2640     is_netburst() ? " Netburst" : "",
2641     is_intel_family_core() ? " Core" : "",
2642     x64);
2643 
2644   return OS_OK;
2645 }
2646 
2647 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2648   assert(buf != nullptr, "buffer is null!");
2649   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2650   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2651 
2652   // invoke newly generated asm code to fetch CPU Brand String
2653   getCPUIDBrandString_stub(&_cpuid_info);
2654 
2655   // fetch results into buffer
2656   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2657   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2658   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2659   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2660   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2661   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2662   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2663   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2664   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2665   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2666   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2667   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2668 
2669   return OS_OK;
2670 }
2671 
2672 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2673   guarantee(buf != nullptr, "buffer is null!");
2674   guarantee(buf_len > 0, "buffer len not enough!");
2675 
2676   unsigned int flag = 0;
2677   unsigned int fi = 0;
2678   size_t       written = 0;
2679   const char*  prefix = "";
2680 
2681 #define WRITE_TO_BUF(string)                                                          \
2682   {                                                                                   \
2683     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2684     if (res < 0) {                                                                    \
2685       return buf_len - 1;                                                             \
2686     }                                                                                 \
2687     written += res;                                                                   \
2688     if (prefix[0] == '\0') {                                                          \
2689       prefix = ", ";                                                                  \
2690     }                                                                                 \
2691   }
2692 
2693   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2694     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2695       continue; /* no hyperthreading */
2696     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2697       continue; /* no fast system call */
2698     }
2699     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2700       WRITE_TO_BUF(_feature_edx_id[fi]);
2701     }
2702   }
2703 
2704   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2705     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2706       WRITE_TO_BUF(_feature_ecx_id[fi]);
2707     }
2708   }
2709 
2710   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2711     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2712       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2713     }
2714   }
2715 
2716   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2717     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2718       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2719     }
2720   }
2721 
2722   if (supports_tscinv_bit()) {
2723       WRITE_TO_BUF("Invariant TSC");
2724   }
2725 
2726   return written;
2727 }
2728 
2729 /**
2730  * Write a detailed description of the cpu to a given buffer, including
2731  * feature set.
2732  */
2733 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2734   assert(buf != nullptr, "buffer is null!");
2735   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2736 
2737   static const char* unknown = "<unknown>";
2738   char               vendor_id[VENDOR_LENGTH];
2739   const char*        family = nullptr;
2740   const char*        model = nullptr;
2741   const char*        brand = nullptr;
2742   int                outputLen = 0;
2743 
2744   family = cpu_family_description();
2745   if (family == nullptr) {
2746     family = unknown;
2747   }
2748 
2749   model = cpu_model_description();
2750   if (model == nullptr) {
2751     model = unknown;
2752   }
2753 
2754   brand = cpu_brand_string();
2755 
2756   if (brand == nullptr) {
2757     brand = cpu_brand();
2758     if (brand == nullptr) {
2759       brand = unknown;
2760     }
2761   }
2762 
2763   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2764   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2765   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2766   vendor_id[VENDOR_LENGTH-1] = '\0';
2767 
2768   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2769     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2770     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2771     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2772     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2773     "Supports: ",
2774     brand,
2775     vendor_id,
2776     family,
2777     extended_cpu_family(),
2778     model,
2779     extended_cpu_model(),
2780     cpu_stepping(),
2781     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2782     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2783     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2784     _cpuid_info.std_cpuid1_eax.value,
2785     _cpuid_info.std_cpuid1_ebx.value,
2786     _cpuid_info.std_cpuid1_ecx.value,
2787     _cpuid_info.std_cpuid1_edx.value,
2788     _cpuid_info.ext_cpuid1_eax,
2789     _cpuid_info.ext_cpuid1_ebx,
2790     _cpuid_info.ext_cpuid1_ecx,
2791     _cpuid_info.ext_cpuid1_edx);
2792 
2793   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2794     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2795     return OS_ERR;
2796   }
2797 
2798   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2799 
2800   return OS_OK;
2801 }
2802 
2803 
2804 // Fill in Abstract_VM_Version statics
2805 void VM_Version::initialize_cpu_information() {
2806   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2807   assert(!_initialized, "shouldn't be initialized yet");
2808   resolve_cpu_information_details();
2809 
2810   // initialize cpu_name and cpu_desc
2811   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2812   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2813   _initialized = true;
2814 }
2815 
2816 /**
2817  *  For information about extracting the frequency from the cpu brand string, please see:
2818  *
2819  *    Intel Processor Identification and the CPUID Instruction
2820  *    Application Note 485
2821  *    May 2012
2822  *
2823  * The return value is the frequency in Hz.
2824  */
2825 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2826   const char* const brand_string = cpu_brand_string();
2827   if (brand_string == nullptr) {
2828     return 0;
2829   }
2830   const int64_t MEGA = 1000000;
2831   int64_t multiplier = 0;
2832   int64_t frequency = 0;
2833   uint8_t idx = 0;
2834   // The brand string buffer is at most 48 bytes.
2835   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2836   for (; idx < 48-2; ++idx) {
2837     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2838     // Search brand string for "yHz" where y is M, G, or T.
2839     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2840       if (brand_string[idx] == 'M') {
2841         multiplier = MEGA;
2842       } else if (brand_string[idx] == 'G') {
2843         multiplier = MEGA * 1000;
2844       } else if (brand_string[idx] == 'T') {
2845         multiplier = MEGA * MEGA;
2846       }
2847       break;
2848     }
2849   }
2850   if (multiplier > 0) {
2851     // Compute frequency (in Hz) from brand string.
2852     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2853       frequency =  (brand_string[idx-4] - '0') * multiplier;
2854       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2855       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2856     } else { // format is "xxxx"
2857       frequency =  (brand_string[idx-4] - '0') * 1000;
2858       frequency += (brand_string[idx-3] - '0') * 100;
2859       frequency += (brand_string[idx-2] - '0') * 10;
2860       frequency += (brand_string[idx-1] - '0');
2861       frequency *= multiplier;
2862     }
2863   }
2864   return frequency;
2865 }
2866 
2867 
2868 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2869   if (_max_qualified_cpu_frequency == 0) {
2870     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2871   }
2872   return _max_qualified_cpu_frequency;
2873 }
2874 
2875 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2876   VM_Features vm_features;
2877   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2878     vm_features.set_feature(CPU_CX8);
2879   if (std_cpuid1_edx.bits.cmov != 0)
2880     vm_features.set_feature(CPU_CMOV);
2881   if (std_cpuid1_edx.bits.clflush != 0)
2882     vm_features.set_feature(CPU_FLUSH);
2883   // clflush should always be available on x86_64
2884   // if not we are in real trouble because we rely on it
2885   // to flush the code cache.
2886   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2887   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2888       ext_cpuid1_edx.bits.fxsr != 0))
2889     vm_features.set_feature(CPU_FXSR);
2890   // HT flag is set for multi-core processors also.
2891   if (threads_per_core() > 1)
2892     vm_features.set_feature(CPU_HT);
2893   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2894       ext_cpuid1_edx.bits.mmx != 0))
2895     vm_features.set_feature(CPU_MMX);
2896   if (std_cpuid1_edx.bits.sse != 0)
2897     vm_features.set_feature(CPU_SSE);
2898   if (std_cpuid1_edx.bits.sse2 != 0)
2899     vm_features.set_feature(CPU_SSE2);
2900   if (std_cpuid1_ecx.bits.sse3 != 0)
2901     vm_features.set_feature(CPU_SSE3);
2902   if (std_cpuid1_ecx.bits.ssse3 != 0)
2903     vm_features.set_feature(CPU_SSSE3);
2904   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2905     vm_features.set_feature(CPU_SSE4_1);
2906   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2907     vm_features.set_feature(CPU_SSE4_2);
2908   if (std_cpuid1_ecx.bits.popcnt != 0)
2909     vm_features.set_feature(CPU_POPCNT);
2910   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2911       xem_xcr0_eax.bits.apx_f != 0) {
2912     vm_features.set_feature(CPU_APX_F);
2913   }
2914   if (std_cpuid1_ecx.bits.avx != 0 &&
2915       std_cpuid1_ecx.bits.osxsave != 0 &&
2916       xem_xcr0_eax.bits.sse != 0 &&
2917       xem_xcr0_eax.bits.ymm != 0) {
2918     vm_features.set_feature(CPU_AVX);
2919     vm_features.set_feature(CPU_VZEROUPPER);
2920     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2921       vm_features.set_feature(CPU_SHA512);
2922     if (std_cpuid1_ecx.bits.f16c != 0)
2923       vm_features.set_feature(CPU_F16C);
2924     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2925       vm_features.set_feature(CPU_AVX2);
2926       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2927         vm_features.set_feature(CPU_AVX_IFMA);
2928     }
2929     if (sef_cpuid7_ecx.bits.gfni != 0)
2930         vm_features.set_feature(CPU_GFNI);
2931     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2932         xem_xcr0_eax.bits.opmask != 0 &&
2933         xem_xcr0_eax.bits.zmm512 != 0 &&
2934         xem_xcr0_eax.bits.zmm32 != 0) {
2935       vm_features.set_feature(CPU_AVX512F);
2936       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2937         vm_features.set_feature(CPU_AVX512CD);
2938       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2939         vm_features.set_feature(CPU_AVX512DQ);
2940       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2941         vm_features.set_feature(CPU_AVX512_IFMA);
2942       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2943         vm_features.set_feature(CPU_AVX512PF);
2944       if (sef_cpuid7_ebx.bits.avx512er != 0)
2945         vm_features.set_feature(CPU_AVX512ER);
2946       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2947         vm_features.set_feature(CPU_AVX512BW);
2948       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2949         vm_features.set_feature(CPU_AVX512VL);
2950       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2951         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2952       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2953         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2954       if (sef_cpuid7_ecx.bits.vaes != 0)
2955         vm_features.set_feature(CPU_AVX512_VAES);
2956       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2957         vm_features.set_feature(CPU_AVX512_VNNI);
2958       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2959         vm_features.set_feature(CPU_AVX512_BITALG);
2960       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2961         vm_features.set_feature(CPU_AVX512_VBMI);
2962       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2963         vm_features.set_feature(CPU_AVX512_VBMI2);
2964     }
2965     if (is_intel()) {
2966       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2967           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2968           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2969           xem_xcr0_eax.bits.opmask != 0 &&
2970           xem_xcr0_eax.bits.zmm512 != 0 &&
2971           xem_xcr0_eax.bits.zmm32 != 0) {
2972         vm_features.set_feature(CPU_AVX10_1);
2973         vm_features.set_feature(CPU_AVX512F);
2974         vm_features.set_feature(CPU_AVX512CD);
2975         vm_features.set_feature(CPU_AVX512DQ);
2976         vm_features.set_feature(CPU_AVX512PF);
2977         vm_features.set_feature(CPU_AVX512ER);
2978         vm_features.set_feature(CPU_AVX512BW);
2979         vm_features.set_feature(CPU_AVX512VL);
2980         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2981         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2982         vm_features.set_feature(CPU_AVX512_VAES);
2983         vm_features.set_feature(CPU_AVX512_VNNI);
2984         vm_features.set_feature(CPU_AVX512_BITALG);
2985         vm_features.set_feature(CPU_AVX512_VBMI);
2986         vm_features.set_feature(CPU_AVX512_VBMI2);
2987         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
2988           vm_features.set_feature(CPU_AVX10_2);
2989         }
2990       }
2991     }
2992   }
2993 
2994   if (std_cpuid1_ecx.bits.hv != 0)
2995     vm_features.set_feature(CPU_HV);
2996   if (sef_cpuid7_ebx.bits.bmi1 != 0)
2997     vm_features.set_feature(CPU_BMI1);
2998   if (std_cpuid1_edx.bits.tsc != 0)
2999     vm_features.set_feature(CPU_TSC);
3000   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3001     vm_features.set_feature(CPU_TSCINV_BIT);
3002   if (std_cpuid1_ecx.bits.aes != 0)
3003     vm_features.set_feature(CPU_AES);
3004   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3005     vm_features.set_feature(CPU_LZCNT);
3006   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3007     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3008   if (sef_cpuid7_ebx.bits.erms != 0)
3009     vm_features.set_feature(CPU_ERMS);
3010   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3011     vm_features.set_feature(CPU_FSRM);
3012   if (std_cpuid1_ecx.bits.clmul != 0)
3013     vm_features.set_feature(CPU_CLMUL);
3014   if (sef_cpuid7_ebx.bits.rtm != 0)
3015     vm_features.set_feature(CPU_RTM);
3016   if (sef_cpuid7_ebx.bits.adx != 0)
3017      vm_features.set_feature(CPU_ADX);
3018   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3019     vm_features.set_feature(CPU_BMI2);
3020   if (sef_cpuid7_ebx.bits.sha != 0)
3021     vm_features.set_feature(CPU_SHA);
3022   if (std_cpuid1_ecx.bits.fma != 0)
3023     vm_features.set_feature(CPU_FMA);
3024   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3025     vm_features.set_feature(CPU_FLUSHOPT);
3026   if (sef_cpuid7_ebx.bits.clwb != 0)
3027     vm_features.set_feature(CPU_CLWB);
3028   if (ext_cpuid1_edx.bits.rdtscp != 0)
3029     vm_features.set_feature(CPU_RDTSCP);
3030   if (sef_cpuid7_ecx.bits.rdpid != 0)
3031     vm_features.set_feature(CPU_RDPID);
3032 
3033   // AMD|Hygon additional features.
3034   if (is_amd_family()) {
3035     // PREFETCHW was checked above, check TDNOW here.
3036     if ((ext_cpuid1_edx.bits.tdnow != 0))
3037       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3038     if (ext_cpuid1_ecx.bits.sse4a != 0)
3039       vm_features.set_feature(CPU_SSE4A);
3040   }
3041 
3042   // Intel additional features.
3043   if (is_intel()) {
3044     if (sef_cpuid7_edx.bits.serialize != 0)
3045       vm_features.set_feature(CPU_SERIALIZE);
3046     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3047       vm_features.set_feature(CPU_AVX512_FP16);
3048   }
3049 
3050   // ZX additional features.
3051   if (is_zx()) {
3052     // We do not know if these are supported by ZX, so we cannot trust
3053     // common CPUID bit for them.
3054     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3055     vm_features.clear_feature(CPU_CLWB);
3056   }
3057 
3058   // Protection key features.
3059   if (sef_cpuid7_ecx.bits.pku != 0) {
3060     vm_features.set_feature(CPU_PKU);
3061   }
3062   if (sef_cpuid7_ecx.bits.ospke != 0) {
3063     vm_features.set_feature(CPU_OSPKE);
3064   }
3065 
3066   // Control flow enforcement (CET) features.
3067   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3068     vm_features.set_feature(CPU_CET_SS);
3069   }
3070   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3071     vm_features.set_feature(CPU_CET_IBT);
3072   }
3073 
3074   // Composite features.
3075   if (supports_tscinv_bit() &&
3076       ((is_amd_family() && !is_amd_Barcelona()) ||
3077        is_intel_tsc_synched_at_init())) {
3078     vm_features.set_feature(CPU_TSCINV);
3079   }
3080   return vm_features;
3081 }
3082 
3083 bool VM_Version::os_supports_avx_vectors() {
3084   bool retVal = false;
3085   int nreg = 4;
3086   if (supports_evex()) {
3087     // Verify that OS save/restore all bits of EVEX registers
3088     // during signal processing.
3089     retVal = true;
3090     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3091       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3092         retVal = false;
3093         break;
3094       }
3095     }
3096   } else if (supports_avx()) {
3097     // Verify that OS save/restore all bits of AVX registers
3098     // during signal processing.
3099     retVal = true;
3100     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3101       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3102         retVal = false;
3103         break;
3104       }
3105     }
3106     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3107     if (retVal == false) {
3108       // Verify that OS save/restore all bits of EVEX registers
3109       // during signal processing.
3110       retVal = true;
3111       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3112         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3113           retVal = false;
3114           break;
3115         }
3116       }
3117     }
3118   }
3119   return retVal;
3120 }
3121 
3122 bool VM_Version::os_supports_apx_egprs() {
3123   if (!supports_apx_f()) {
3124     return false;
3125   }
3126   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3127       _cpuid_info.apx_save[1] != egpr_test_value()) {
3128     return false;
3129   }
3130   return true;
3131 }
3132 
3133 uint VM_Version::cores_per_cpu() {
3134   uint result = 1;
3135   if (is_intel()) {
3136     bool supports_topology = supports_processor_topology();
3137     if (supports_topology) {
3138       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3139                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3140     }
3141     if (!supports_topology || result == 0) {
3142       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3143     }
3144   } else if (is_amd_family()) {
3145     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3146   } else if (is_zx()) {
3147     bool supports_topology = supports_processor_topology();
3148     if (supports_topology) {
3149       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3150                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3151     }
3152     if (!supports_topology || result == 0) {
3153       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3154     }
3155   }
3156   return result;
3157 }
3158 
3159 uint VM_Version::threads_per_core() {
3160   uint result = 1;
3161   if (is_intel() && supports_processor_topology()) {
3162     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3163   } else if (is_zx() && supports_processor_topology()) {
3164     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3165   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3166     if (cpu_family() >= 0x17) {
3167       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3168     } else {
3169       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3170                  cores_per_cpu();
3171     }
3172   }
3173   return (result == 0 ? 1 : result);
3174 }
3175 
3176 uint VM_Version::L1_line_size() {
3177   uint result = 0;
3178   if (is_intel()) {
3179     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3180   } else if (is_amd_family()) {
3181     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3182   } else if (is_zx()) {
3183     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3184   }
3185   if (result < 32) // not defined ?
3186     result = 32;   // 32 bytes by default on x86 and other x64
3187   return result;
3188 }
3189 
3190 bool VM_Version::is_intel_tsc_synched_at_init() {
3191   if (is_intel_family_core()) {
3192     uint32_t ext_model = extended_cpu_model();
3193     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3194         ext_model == CPU_MODEL_WESTMERE_EP    ||
3195         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3196         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3197       // <= 2-socket invariant tsc support. EX versions are usually used
3198       // in > 2-socket systems and likely don't synchronize tscs at
3199       // initialization.
3200       // Code that uses tsc values must be prepared for them to arbitrarily
3201       // jump forward or backward.
3202       return true;
3203     }
3204   }
3205   return false;
3206 }
3207 
3208 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3209   // Hardware prefetching (distance/size in bytes):
3210   // Pentium 3 -  64 /  32
3211   // Pentium 4 - 256 / 128
3212   // Athlon    -  64 /  32 ????
3213   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3214   // Core      - 128 /  64
3215   //
3216   // Software prefetching (distance in bytes / instruction with best score):
3217   // Pentium 3 - 128 / prefetchnta
3218   // Pentium 4 - 512 / prefetchnta
3219   // Athlon    - 128 / prefetchnta
3220   // Opteron   - 256 / prefetchnta
3221   // Core      - 256 / prefetchnta
3222   // It will be used only when AllocatePrefetchStyle > 0
3223 
3224   if (is_amd_family()) { // AMD | Hygon
3225     if (supports_sse2()) {
3226       return 256; // Opteron
3227     } else {
3228       return 128; // Athlon
3229     }
3230   } else { // Intel
3231     if (supports_sse3() && is_intel_server_family()) {
3232       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3233         return 192;
3234       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3235         return 384;
3236       }
3237     }
3238     if (supports_sse2()) {
3239       if (is_intel_server_family()) {
3240         return 256; // Pentium M, Core, Core2
3241       } else {
3242         return 512; // Pentium 4
3243       }
3244     } else {
3245       return 128; // Pentium 3 (and all other old CPUs)
3246     }
3247   }
3248 }
3249 
3250 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3251   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3252   switch (id) {
3253   case vmIntrinsics::_floatToFloat16:
3254   case vmIntrinsics::_float16ToFloat:
3255     if (!supports_float16()) {
3256       return false;
3257     }
3258     break;
3259   default:
3260     break;
3261   }
3262   return true;
3263 }
3264 
3265 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3266   int i = 0;
3267   ss.join([&]() {
3268     while (i < MAX_CPU_FEATURES) {
3269       if (_features.supports_feature((VM_Version::Feature_Flag)i)) {
3270         return _features_names[i++];
3271       }
3272       i += 1;
3273     }
3274     return (const char*)nullptr;
3275   }, ", ");
3276 }