1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2000;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76 }
  77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  78 static detect_virt_stub_t detect_virt_stub = nullptr;
  79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  80 
  81 bool VM_Version::supports_clflush() {
  82   // clflush should always be available on x86_64
  83   // if not we are in real trouble because we rely on it
  84   // to flush the code cache.
  85   // Unfortunately, Assembler::clflush is currently called as part
  86   // of generation of the code cache flush routine. This happens
  87   // under Universe::init before the processor features are set
  88   // up. Assembler::flush calls this routine to check that clflush
  89   // is allowed. So, we give the caller a free pass if Universe init
  90   // is still in progress.
  91   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  92   return true;
  93 }
  94 
  95 #define CPUID_STANDARD_FN   0x0
  96 #define CPUID_STANDARD_FN_1 0x1
  97 #define CPUID_STANDARD_FN_4 0x4
  98 #define CPUID_STANDARD_FN_B 0xb
  99 
 100 #define CPUID_EXTENDED_FN   0x80000000
 101 #define CPUID_EXTENDED_FN_1 0x80000001
 102 #define CPUID_EXTENDED_FN_2 0x80000002
 103 #define CPUID_EXTENDED_FN_3 0x80000003
 104 #define CPUID_EXTENDED_FN_4 0x80000004
 105 #define CPUID_EXTENDED_FN_7 0x80000007
 106 #define CPUID_EXTENDED_FN_8 0x80000008
 107 
 108 class VM_Version_StubGenerator: public StubCodeGenerator {
 109  public:
 110 
 111   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 112 
 113   address clear_apx_test_state() {
 114 #   define __ _masm->
 115     address start = __ pc();
 116     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 117     // handling guarantees that preserved register values post signal handling were
 118     // re-instantiated by operating system and not because they were not modified externally.
 119 
 120     bool save_apx = UseAPX;
 121     VM_Version::set_apx_cpuFeatures();
 122     UseAPX = true;
 123     // EGPR state save/restoration.
 124     __ mov64(r16, 0L);
 125     __ mov64(r31, 0L);
 126     UseAPX = save_apx;
 127     VM_Version::clean_cpuFeatures();
 128     __ ret(0);
 129     return start;
 130   }
 131 
 132   address generate_get_cpu_info() {
 133     // Flags to test CPU type.
 134     const uint32_t HS_EFL_AC = 0x40000;
 135     const uint32_t HS_EFL_ID = 0x200000;
 136     // Values for when we don't have a CPUID instruction.
 137     const int      CPU_FAMILY_SHIFT = 8;
 138     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 139     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 140     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 141 
 142     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24;
 143     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 144     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 145     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 146 
 147     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 148 #   define __ _masm->
 149 
 150     address start = __ pc();
 151 
 152     //
 153     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 154     //
 155     // rcx and rdx are first and second argument registers on windows
 156 
 157     __ push(rbp);
 158     __ mov(rbp, c_rarg0); // cpuid_info address
 159     __ push(rbx);
 160     __ push(rsi);
 161     __ pushf();          // preserve rbx, and flags
 162     __ pop(rax);
 163     __ push(rax);
 164     __ mov(rcx, rax);
 165     //
 166     // if we are unable to change the AC flag, we have a 386
 167     //
 168     __ xorl(rax, HS_EFL_AC);
 169     __ push(rax);
 170     __ popf();
 171     __ pushf();
 172     __ pop(rax);
 173     __ cmpptr(rax, rcx);
 174     __ jccb(Assembler::notEqual, detect_486);
 175 
 176     __ movl(rax, CPU_FAMILY_386);
 177     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 178     __ jmp(done);
 179 
 180     //
 181     // If we are unable to change the ID flag, we have a 486 which does
 182     // not support the "cpuid" instruction.
 183     //
 184     __ bind(detect_486);
 185     __ mov(rax, rcx);
 186     __ xorl(rax, HS_EFL_ID);
 187     __ push(rax);
 188     __ popf();
 189     __ pushf();
 190     __ pop(rax);
 191     __ cmpptr(rcx, rax);
 192     __ jccb(Assembler::notEqual, detect_586);
 193 
 194     __ bind(cpu486);
 195     __ movl(rax, CPU_FAMILY_486);
 196     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 197     __ jmp(done);
 198 
 199     //
 200     // At this point, we have a chip which supports the "cpuid" instruction
 201     //
 202     __ bind(detect_586);
 203     __ xorl(rax, rax);
 204     __ cpuid();
 205     __ orl(rax, rax);
 206     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 207                                         // value of at least 1, we give up and
 208                                         // assume a 486
 209     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 210     __ movl(Address(rsi, 0), rax);
 211     __ movl(Address(rsi, 4), rbx);
 212     __ movl(Address(rsi, 8), rcx);
 213     __ movl(Address(rsi,12), rdx);
 214 
 215     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 216     __ jccb(Assembler::belowEqual, std_cpuid4);
 217 
 218     //
 219     // cpuid(0xB) Processor Topology
 220     //
 221     __ movl(rax, 0xb);
 222     __ xorl(rcx, rcx);   // Threads level
 223     __ cpuid();
 224 
 225     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 226     __ movl(Address(rsi, 0), rax);
 227     __ movl(Address(rsi, 4), rbx);
 228     __ movl(Address(rsi, 8), rcx);
 229     __ movl(Address(rsi,12), rdx);
 230 
 231     __ movl(rax, 0xb);
 232     __ movl(rcx, 1);     // Cores level
 233     __ cpuid();
 234     __ push(rax);
 235     __ andl(rax, 0x1f);  // Determine if valid topology level
 236     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 237     __ andl(rax, 0xffff);
 238     __ pop(rax);
 239     __ jccb(Assembler::equal, std_cpuid4);
 240 
 241     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 242     __ movl(Address(rsi, 0), rax);
 243     __ movl(Address(rsi, 4), rbx);
 244     __ movl(Address(rsi, 8), rcx);
 245     __ movl(Address(rsi,12), rdx);
 246 
 247     __ movl(rax, 0xb);
 248     __ movl(rcx, 2);     // Packages level
 249     __ cpuid();
 250     __ push(rax);
 251     __ andl(rax, 0x1f);  // Determine if valid topology level
 252     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 253     __ andl(rax, 0xffff);
 254     __ pop(rax);
 255     __ jccb(Assembler::equal, std_cpuid4);
 256 
 257     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 258     __ movl(Address(rsi, 0), rax);
 259     __ movl(Address(rsi, 4), rbx);
 260     __ movl(Address(rsi, 8), rcx);
 261     __ movl(Address(rsi,12), rdx);
 262 
 263     //
 264     // cpuid(0x4) Deterministic cache params
 265     //
 266     __ bind(std_cpuid4);
 267     __ movl(rax, 4);
 268     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 269     __ jccb(Assembler::greater, std_cpuid1);
 270 
 271     __ xorl(rcx, rcx);   // L1 cache
 272     __ cpuid();
 273     __ push(rax);
 274     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 275     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 276     __ pop(rax);
 277     __ jccb(Assembler::equal, std_cpuid1);
 278 
 279     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 280     __ movl(Address(rsi, 0), rax);
 281     __ movl(Address(rsi, 4), rbx);
 282     __ movl(Address(rsi, 8), rcx);
 283     __ movl(Address(rsi,12), rdx);
 284 
 285     //
 286     // Standard cpuid(0x1)
 287     //
 288     __ bind(std_cpuid1);
 289     __ movl(rax, 1);
 290     __ cpuid();
 291     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 292     __ movl(Address(rsi, 0), rax);
 293     __ movl(Address(rsi, 4), rbx);
 294     __ movl(Address(rsi, 8), rcx);
 295     __ movl(Address(rsi,12), rdx);
 296 
 297     //
 298     // Check if OS has enabled XGETBV instruction to access XCR0
 299     // (OSXSAVE feature flag) and CPU supports AVX
 300     //
 301     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 302     __ cmpl(rcx, 0x18000000);
 303     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 304 
 305     //
 306     // XCR0, XFEATURE_ENABLED_MASK register
 307     //
 308     __ xorl(rcx, rcx);   // zero for XCR0 register
 309     __ xgetbv();
 310     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 311     __ movl(Address(rsi, 0), rax);
 312     __ movl(Address(rsi, 4), rdx);
 313 
 314     //
 315     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 316     //
 317     __ bind(sef_cpuid);
 318     __ movl(rax, 7);
 319     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 320     __ jccb(Assembler::greater, ext_cpuid);
 321     // ECX = 0
 322     __ xorl(rcx, rcx);
 323     __ cpuid();
 324     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 325     __ movl(Address(rsi, 0), rax);
 326     __ movl(Address(rsi, 4), rbx);
 327     __ movl(Address(rsi, 8), rcx);
 328     __ movl(Address(rsi, 12), rdx);
 329 
 330     //
 331     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 332     //
 333     __ bind(sefsl1_cpuid);
 334     __ movl(rax, 7);
 335     __ movl(rcx, 1);
 336     __ cpuid();
 337     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 338     __ movl(Address(rsi, 0), rax);
 339     __ movl(Address(rsi, 4), rdx);
 340 
 341     //
 342     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 343     //
 344     __ bind(std_cpuid24);
 345     __ movl(rax, 0x24);
 346     __ movl(rcx, 0);
 347     __ cpuid();
 348     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 349     __ movl(Address(rsi, 0), rax);
 350     __ movl(Address(rsi, 4), rbx);
 351 
 352     //
 353     // Extended cpuid(0x80000000)
 354     //
 355     __ bind(ext_cpuid);
 356     __ movl(rax, 0x80000000);
 357     __ cpuid();
 358     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 359     __ jcc(Assembler::belowEqual, done);
 360     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 361     __ jcc(Assembler::belowEqual, ext_cpuid1);
 362     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 363     __ jccb(Assembler::belowEqual, ext_cpuid5);
 364     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 365     __ jccb(Assembler::belowEqual, ext_cpuid7);
 366     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 367     __ jccb(Assembler::belowEqual, ext_cpuid8);
 368     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 369     __ jccb(Assembler::below, ext_cpuid8);
 370     //
 371     // Extended cpuid(0x8000001E)
 372     //
 373     __ movl(rax, 0x8000001E);
 374     __ cpuid();
 375     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 376     __ movl(Address(rsi, 0), rax);
 377     __ movl(Address(rsi, 4), rbx);
 378     __ movl(Address(rsi, 8), rcx);
 379     __ movl(Address(rsi,12), rdx);
 380 
 381     //
 382     // Extended cpuid(0x80000008)
 383     //
 384     __ bind(ext_cpuid8);
 385     __ movl(rax, 0x80000008);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000007)
 395     //
 396     __ bind(ext_cpuid7);
 397     __ movl(rax, 0x80000007);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000005)
 407     //
 408     __ bind(ext_cpuid5);
 409     __ movl(rax, 0x80000005);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000001)
 419     //
 420     __ bind(ext_cpuid1);
 421     __ movl(rax, 0x80000001);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Check if OS has enabled XGETBV instruction to access XCR0
 431     // (OSXSAVE feature flag) and CPU supports APX
 432     //
 433     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 434     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 436     __ movl(rax, 0x200000);
 437     __ andl(rax, Address(rsi, 4));
 438     __ jcc(Assembler::equal, vector_save_restore);
 439     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 440     __ movl(rax, 0x80000);
 441     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 442     __ jcc(Assembler::equal, vector_save_restore);
 443 
 444 #ifndef PRODUCT
 445     bool save_apx = UseAPX;
 446     VM_Version::set_apx_cpuFeatures();
 447     UseAPX = true;
 448     __ mov64(r16, VM_Version::egpr_test_value());
 449     __ mov64(r31, VM_Version::egpr_test_value());
 450     __ xorl(rsi, rsi);
 451     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 452     // Generate SEGV
 453     __ movl(rax, Address(rsi, 0));
 454 
 455     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 456     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 457     __ movq(Address(rsi, 0), r16);
 458     __ movq(Address(rsi, 8), r31);
 459 
 460     UseAPX = save_apx;
 461 #endif
 462     __ bind(vector_save_restore);
 463     //
 464     // Check if OS has enabled XGETBV instruction to access XCR0
 465     // (OSXSAVE feature flag) and CPU supports AVX
 466     //
 467     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 468     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 469     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 470     __ cmpl(rcx, 0x18000000);
 471     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 472 
 473     __ movl(rax, 0x6);
 474     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 475     __ cmpl(rax, 0x6);
 476     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 477 
 478     // we need to bridge farther than imm8, so we use this island as a thunk
 479     __ bind(done);
 480     __ jmp(wrapup);
 481 
 482     __ bind(start_simd_check);
 483     //
 484     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 485     // registers are not restored after a signal processing.
 486     // Generate SEGV here (reference through null)
 487     // and check upper YMM/ZMM bits after it.
 488     //
 489     int saved_useavx = UseAVX;
 490     int saved_usesse = UseSSE;
 491 
 492     // If UseAVX is uninitialized or is set by the user to include EVEX
 493     if (use_evex) {
 494       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 495       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 496       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 497       __ movl(rax, 0x10000);
 498       __ andl(rax, Address(rsi, 4));
 499       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 500       __ movl(rbx, 0x80000);
 501       __ andl(rbx, Address(rsi, 4));
 502       __ orl(rax, rbx);
 503       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 504       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 505       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 506       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 507       __ movl(rax, 0xE0);
 508       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 509       __ cmpl(rax, 0xE0);
 510       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 511 
 512       if (FLAG_IS_DEFAULT(UseAVX)) {
 513         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 514         __ movl(rax, Address(rsi, 0));
 515         __ cmpl(rax, 0x50654);              // If it is Skylake
 516         __ jcc(Assembler::equal, legacy_setup);
 517       }
 518       // EVEX setup: run in lowest evex mode
 519       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 520       UseAVX = 3;
 521       UseSSE = 2;
 522 #ifdef _WINDOWS
 523       // xmm5-xmm15 are not preserved by caller on windows
 524       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 525       __ subptr(rsp, 64);
 526       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 527       __ subptr(rsp, 64);
 528       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 529       __ subptr(rsp, 64);
 530       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 531 #endif // _WINDOWS
 532 
 533       // load value into all 64 bytes of zmm7 register
 534       __ movl(rcx, VM_Version::ymm_test_value());
 535       __ movdl(xmm0, rcx);
 536       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 537       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 538       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 539       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 540       VM_Version::clean_cpuFeatures();
 541       __ jmp(save_restore_except);
 542     }
 543 
 544     __ bind(legacy_setup);
 545     // AVX setup
 546     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 547     UseAVX = 1;
 548     UseSSE = 2;
 549 #ifdef _WINDOWS
 550     __ subptr(rsp, 32);
 551     __ vmovdqu(Address(rsp, 0), xmm7);
 552     __ subptr(rsp, 32);
 553     __ vmovdqu(Address(rsp, 0), xmm8);
 554     __ subptr(rsp, 32);
 555     __ vmovdqu(Address(rsp, 0), xmm15);
 556 #endif // _WINDOWS
 557 
 558     // load value into all 32 bytes of ymm7 register
 559     __ movl(rcx, VM_Version::ymm_test_value());
 560 
 561     __ movdl(xmm0, rcx);
 562     __ pshufd(xmm0, xmm0, 0x00);
 563     __ vinsertf128_high(xmm0, xmm0);
 564     __ vmovdqu(xmm7, xmm0);
 565     __ vmovdqu(xmm8, xmm0);
 566     __ vmovdqu(xmm15, xmm0);
 567     VM_Version::clean_cpuFeatures();
 568 
 569     __ bind(save_restore_except);
 570     __ xorl(rsi, rsi);
 571     VM_Version::set_cpuinfo_segv_addr(__ pc());
 572     // Generate SEGV
 573     __ movl(rax, Address(rsi, 0));
 574 
 575     VM_Version::set_cpuinfo_cont_addr(__ pc());
 576     // Returns here after signal. Save xmm0 to check it later.
 577 
 578     // If UseAVX is uninitialized or is set by the user to include EVEX
 579     if (use_evex) {
 580       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 581       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 582       __ movl(rax, 0x10000);
 583       __ andl(rax, Address(rsi, 4));
 584       __ jcc(Assembler::equal, legacy_save_restore);
 585       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 586       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 587       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 588       __ movl(rax, 0xE0);
 589       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 590       __ cmpl(rax, 0xE0);
 591       __ jcc(Assembler::notEqual, legacy_save_restore);
 592 
 593       if (FLAG_IS_DEFAULT(UseAVX)) {
 594         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 595         __ movl(rax, Address(rsi, 0));
 596         __ cmpl(rax, 0x50654);              // If it is Skylake
 597         __ jcc(Assembler::equal, legacy_save_restore);
 598       }
 599       // EVEX check: run in lowest evex mode
 600       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 601       UseAVX = 3;
 602       UseSSE = 2;
 603       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 604       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 605       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 606       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 607       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 608 
 609 #ifdef _WINDOWS
 610       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 611       __ addptr(rsp, 64);
 612       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 613       __ addptr(rsp, 64);
 614       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 615       __ addptr(rsp, 64);
 616 #endif // _WINDOWS
 617       generate_vzeroupper(wrapup);
 618       VM_Version::clean_cpuFeatures();
 619       UseAVX = saved_useavx;
 620       UseSSE = saved_usesse;
 621       __ jmp(wrapup);
 622    }
 623 
 624     __ bind(legacy_save_restore);
 625     // AVX check
 626     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 627     UseAVX = 1;
 628     UseSSE = 2;
 629     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 630     __ vmovdqu(Address(rsi, 0), xmm0);
 631     __ vmovdqu(Address(rsi, 32), xmm7);
 632     __ vmovdqu(Address(rsi, 64), xmm8);
 633     __ vmovdqu(Address(rsi, 96), xmm15);
 634 
 635 #ifdef _WINDOWS
 636     __ vmovdqu(xmm15, Address(rsp, 0));
 637     __ addptr(rsp, 32);
 638     __ vmovdqu(xmm8, Address(rsp, 0));
 639     __ addptr(rsp, 32);
 640     __ vmovdqu(xmm7, Address(rsp, 0));
 641     __ addptr(rsp, 32);
 642 #endif // _WINDOWS
 643 
 644     generate_vzeroupper(wrapup);
 645     VM_Version::clean_cpuFeatures();
 646     UseAVX = saved_useavx;
 647     UseSSE = saved_usesse;
 648 
 649     __ bind(wrapup);
 650     __ popf();
 651     __ pop(rsi);
 652     __ pop(rbx);
 653     __ pop(rbp);
 654     __ ret(0);
 655 
 656 #   undef __
 657 
 658     return start;
 659   };
 660   void generate_vzeroupper(Label& L_wrapup) {
 661 #   define __ _masm->
 662     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 663     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 664     __ jcc(Assembler::notEqual, L_wrapup);
 665     __ movl(rcx, 0x0FFF0FF0);
 666     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 667     __ andl(rcx, Address(rsi, 0));
 668     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 669     __ jcc(Assembler::equal, L_wrapup);
 670     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 671     __ jcc(Assembler::equal, L_wrapup);
 672     // vzeroupper() will use a pre-computed instruction sequence that we
 673     // can't compute until after we've determined CPU capabilities. Use
 674     // uncached variant here directly to be able to bootstrap correctly
 675     __ vzeroupper_uncached();
 676 #   undef __
 677   }
 678   address generate_detect_virt() {
 679     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 680 #   define __ _masm->
 681 
 682     address start = __ pc();
 683 
 684     // Evacuate callee-saved registers
 685     __ push(rbp);
 686     __ push(rbx);
 687     __ push(rsi); // for Windows
 688 
 689     __ mov(rax, c_rarg0); // CPUID leaf
 690     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 691 
 692     __ cpuid();
 693 
 694     // Store result to register array
 695     __ movl(Address(rsi,  0), rax);
 696     __ movl(Address(rsi,  4), rbx);
 697     __ movl(Address(rsi,  8), rcx);
 698     __ movl(Address(rsi, 12), rdx);
 699 
 700     // Epilogue
 701     __ pop(rsi);
 702     __ pop(rbx);
 703     __ pop(rbp);
 704     __ ret(0);
 705 
 706 #   undef __
 707 
 708     return start;
 709   };
 710 
 711 
 712   address generate_getCPUIDBrandString(void) {
 713     // Flags to test CPU type.
 714     const uint32_t HS_EFL_AC           = 0x40000;
 715     const uint32_t HS_EFL_ID           = 0x200000;
 716     // Values for when we don't have a CPUID instruction.
 717     const int      CPU_FAMILY_SHIFT = 8;
 718     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 719     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 720 
 721     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 722 
 723     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 724 #   define __ _masm->
 725 
 726     address start = __ pc();
 727 
 728     //
 729     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 730     //
 731     // rcx and rdx are first and second argument registers on windows
 732 
 733     __ push(rbp);
 734     __ mov(rbp, c_rarg0); // cpuid_info address
 735     __ push(rbx);
 736     __ push(rsi);
 737     __ pushf();          // preserve rbx, and flags
 738     __ pop(rax);
 739     __ push(rax);
 740     __ mov(rcx, rax);
 741     //
 742     // if we are unable to change the AC flag, we have a 386
 743     //
 744     __ xorl(rax, HS_EFL_AC);
 745     __ push(rax);
 746     __ popf();
 747     __ pushf();
 748     __ pop(rax);
 749     __ cmpptr(rax, rcx);
 750     __ jccb(Assembler::notEqual, detect_486);
 751 
 752     __ movl(rax, CPU_FAMILY_386);
 753     __ jmp(done);
 754 
 755     //
 756     // If we are unable to change the ID flag, we have a 486 which does
 757     // not support the "cpuid" instruction.
 758     //
 759     __ bind(detect_486);
 760     __ mov(rax, rcx);
 761     __ xorl(rax, HS_EFL_ID);
 762     __ push(rax);
 763     __ popf();
 764     __ pushf();
 765     __ pop(rax);
 766     __ cmpptr(rcx, rax);
 767     __ jccb(Assembler::notEqual, detect_586);
 768 
 769     __ bind(cpu486);
 770     __ movl(rax, CPU_FAMILY_486);
 771     __ jmp(done);
 772 
 773     //
 774     // At this point, we have a chip which supports the "cpuid" instruction
 775     //
 776     __ bind(detect_586);
 777     __ xorl(rax, rax);
 778     __ cpuid();
 779     __ orl(rax, rax);
 780     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 781                                         // value of at least 1, we give up and
 782                                         // assume a 486
 783 
 784     //
 785     // Extended cpuid(0x80000000) for processor brand string detection
 786     //
 787     __ bind(ext_cpuid);
 788     __ movl(rax, CPUID_EXTENDED_FN);
 789     __ cpuid();
 790     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 791     __ jcc(Assembler::below, done);
 792 
 793     //
 794     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 795     //
 796     __ movl(rax, CPUID_EXTENDED_FN_2);
 797     __ cpuid();
 798     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 799     __ movl(Address(rsi, 0), rax);
 800     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 801     __ movl(Address(rsi, 0), rbx);
 802     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 803     __ movl(Address(rsi, 0), rcx);
 804     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 805     __ movl(Address(rsi,0), rdx);
 806 
 807     //
 808     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 809     //
 810     __ movl(rax, CPUID_EXTENDED_FN_3);
 811     __ cpuid();
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 813     __ movl(Address(rsi, 0), rax);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 815     __ movl(Address(rsi, 0), rbx);
 816     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 817     __ movl(Address(rsi, 0), rcx);
 818     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 819     __ movl(Address(rsi,0), rdx);
 820 
 821     //
 822     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 823     //
 824     __ movl(rax, CPUID_EXTENDED_FN_4);
 825     __ cpuid();
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 827     __ movl(Address(rsi, 0), rax);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 829     __ movl(Address(rsi, 0), rbx);
 830     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 831     __ movl(Address(rsi, 0), rcx);
 832     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 833     __ movl(Address(rsi,0), rdx);
 834 
 835     //
 836     // return
 837     //
 838     __ bind(done);
 839     __ popf();
 840     __ pop(rsi);
 841     __ pop(rbx);
 842     __ pop(rbp);
 843     __ ret(0);
 844 
 845 #   undef __
 846 
 847     return start;
 848   };
 849 };
 850 
 851 void VM_Version::get_processor_features() {
 852 
 853   _cpu = 4; // 486 by default
 854   _model = 0;
 855   _stepping = 0;
 856   _logical_processors_per_package = 1;
 857   // i486 internal cache is both I&D and has a 16-byte line size
 858   _L1_data_cache_line_size = 16;
 859 
 860   // Get raw processor info
 861 
 862   get_cpu_info_stub(&_cpuid_info);
 863 
 864   assert_is_initialized();
 865   _cpu = extended_cpu_family();
 866   _model = extended_cpu_model();
 867   _stepping = cpu_stepping();
 868 
 869   if (cpu_family() > 4) { // it supports CPUID
 870     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 871     _cpu_features = _features; // Preserve features
 872     // Logical processors are only available on P4s and above,
 873     // and only if hyperthreading is available.
 874     _logical_processors_per_package = logical_processor_count();
 875     _L1_data_cache_line_size = L1_line_size();
 876   }
 877 
 878   // xchg and xadd instructions
 879   _supports_atomic_getset4 = true;
 880   _supports_atomic_getadd4 = true;
 881   _supports_atomic_getset8 = true;
 882   _supports_atomic_getadd8 = true;
 883 
 884   // OS should support SSE for x64 and hardware should support at least SSE2.
 885   if (!VM_Version::supports_sse2()) {
 886     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 887   }
 888   // in 64 bit the use of SSE2 is the minimum
 889   if (UseSSE < 2) UseSSE = 2;
 890 
 891   // flush_icache_stub have to be generated first.
 892   // That is why Icache line size is hard coded in ICache class,
 893   // see icache_x86.hpp. It is also the reason why we can't use
 894   // clflush instruction in 32-bit VM since it could be running
 895   // on CPU which does not support it.
 896   //
 897   // The only thing we can do is to verify that flushed
 898   // ICache::line_size has correct value.
 899   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 900   // clflush_size is size in quadwords (8 bytes).
 901   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 902 
 903   // assigning this field effectively enables Unsafe.writebackMemory()
 904   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 905   // that is only implemented on x86_64 and only if the OS plays ball
 906   if (os::supports_map_sync()) {
 907     // publish data cache line flush size to generic field, otherwise
 908     // let if default to zero thereby disabling writeback
 909     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 910   }
 911 
 912   // Check if processor has Intel Ecore
 913   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 914     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 915       _model == 0xCC || _model == 0xDD)) {
 916     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 917   }
 918 
 919   if (UseSSE < 4) {
 920     _features.clear_feature(CPU_SSE4_1);
 921     _features.clear_feature(CPU_SSE4_2);
 922   }
 923 
 924   if (UseSSE < 3) {
 925     _features.clear_feature(CPU_SSE3);
 926     _features.clear_feature(CPU_SSSE3);
 927     _features.clear_feature(CPU_SSE4A);
 928   }
 929 
 930   if (UseSSE < 2)
 931     _features.clear_feature(CPU_SSE2);
 932 
 933   if (UseSSE < 1)
 934     _features.clear_feature(CPU_SSE);
 935 
 936   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 937   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 938     UseAVX = 0;
 939   }
 940 
 941   // UseSSE is set to the smaller of what hardware supports and what
 942   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 943   // older Pentiums which do not support it.
 944   int use_sse_limit = 0;
 945   if (UseSSE > 0) {
 946     if (UseSSE > 3 && supports_sse4_1()) {
 947       use_sse_limit = 4;
 948     } else if (UseSSE > 2 && supports_sse3()) {
 949       use_sse_limit = 3;
 950     } else if (UseSSE > 1 && supports_sse2()) {
 951       use_sse_limit = 2;
 952     } else if (UseSSE > 0 && supports_sse()) {
 953       use_sse_limit = 1;
 954     } else {
 955       use_sse_limit = 0;
 956     }
 957   }
 958   if (FLAG_IS_DEFAULT(UseSSE)) {
 959     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 960   } else if (UseSSE > use_sse_limit) {
 961     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 962     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 963   }
 964 
 965   // first try initial setting and detect what we can support
 966   int use_avx_limit = 0;
 967   if (UseAVX > 0) {
 968     if (UseSSE < 4) {
 969       // Don't use AVX if SSE is unavailable or has been disabled.
 970       use_avx_limit = 0;
 971     } else if (UseAVX > 2 && supports_evex()) {
 972       use_avx_limit = 3;
 973     } else if (UseAVX > 1 && supports_avx2()) {
 974       use_avx_limit = 2;
 975     } else if (UseAVX > 0 && supports_avx()) {
 976       use_avx_limit = 1;
 977     } else {
 978       use_avx_limit = 0;
 979     }
 980   }
 981   if (FLAG_IS_DEFAULT(UseAVX)) {
 982     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 983     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 984       FLAG_SET_DEFAULT(UseAVX, 2);
 985     } else {
 986       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 987     }
 988   }
 989 
 990   if (UseAVX > use_avx_limit) {
 991     if (UseSSE < 4) {
 992       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 993     } else {
 994       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 995     }
 996     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 997   }
 998 
 999   if (UseAVX < 3) {
1000     _features.clear_feature(CPU_AVX512F);
1001     _features.clear_feature(CPU_AVX512DQ);
1002     _features.clear_feature(CPU_AVX512CD);
1003     _features.clear_feature(CPU_AVX512BW);
1004     _features.clear_feature(CPU_AVX512ER);
1005     _features.clear_feature(CPU_AVX512PF);
1006     _features.clear_feature(CPU_AVX512VL);
1007     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1008     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1009     _features.clear_feature(CPU_AVX512_VAES);
1010     _features.clear_feature(CPU_AVX512_VNNI);
1011     _features.clear_feature(CPU_AVX512_VBMI);
1012     _features.clear_feature(CPU_AVX512_VBMI2);
1013     _features.clear_feature(CPU_AVX512_BITALG);
1014     _features.clear_feature(CPU_AVX512_IFMA);
1015     _features.clear_feature(CPU_APX_F);
1016     _features.clear_feature(CPU_AVX512_FP16);
1017     _features.clear_feature(CPU_AVX10_1);
1018     _features.clear_feature(CPU_AVX10_2);
1019   }
1020 
1021   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1022   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1023   if (UseAPX && !apx_supported) {
1024     warning("UseAPX is not supported on this CPU, setting it to false");
1025     FLAG_SET_DEFAULT(UseAPX, false);
1026   } else if (FLAG_IS_DEFAULT(UseAPX)) {
1027     FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1028   }
1029 
1030   if (!UseAPX) {
1031     _features.clear_feature(CPU_APX_F);
1032   }
1033 
1034   if (UseAVX < 2) {
1035     _features.clear_feature(CPU_AVX2);
1036     _features.clear_feature(CPU_AVX_IFMA);
1037   }
1038 
1039   if (UseAVX < 1) {
1040     _features.clear_feature(CPU_AVX);
1041     _features.clear_feature(CPU_VZEROUPPER);
1042     _features.clear_feature(CPU_F16C);
1043     _features.clear_feature(CPU_SHA512);
1044   }
1045 
1046   if (logical_processors_per_package() == 1) {
1047     // HT processor could be installed on a system which doesn't support HT.
1048     _features.clear_feature(CPU_HT);
1049   }
1050 
1051   if (is_intel()) { // Intel cpus specific settings
1052     if (is_knights_family()) {
1053       _features.clear_feature(CPU_VZEROUPPER);
1054       _features.clear_feature(CPU_AVX512BW);
1055       _features.clear_feature(CPU_AVX512VL);
1056       _features.clear_feature(CPU_AVX512DQ);
1057       _features.clear_feature(CPU_AVX512_VNNI);
1058       _features.clear_feature(CPU_AVX512_VAES);
1059       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1060       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1061       _features.clear_feature(CPU_AVX512_VBMI);
1062       _features.clear_feature(CPU_AVX512_VBMI2);
1063       _features.clear_feature(CPU_CLWB);
1064       _features.clear_feature(CPU_FLUSHOPT);
1065       _features.clear_feature(CPU_GFNI);
1066       _features.clear_feature(CPU_AVX512_BITALG);
1067       _features.clear_feature(CPU_AVX512_IFMA);
1068       _features.clear_feature(CPU_AVX_IFMA);
1069       _features.clear_feature(CPU_AVX512_FP16);
1070       _features.clear_feature(CPU_AVX10_1);
1071       _features.clear_feature(CPU_AVX10_2);
1072     }
1073   }
1074 
1075   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1076     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1077   } else {
1078     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1079   }
1080 
1081   assert(supports_clflush(), "Always present");
1082   if (X86ICacheSync == -1) {
1083     // Auto-detect, choosing the best performant one that still flushes
1084     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1085     if (supports_clwb()) {
1086       FLAG_SET_ERGO(X86ICacheSync, 3);
1087     } else if (supports_clflushopt()) {
1088       FLAG_SET_ERGO(X86ICacheSync, 2);
1089     } else {
1090       FLAG_SET_ERGO(X86ICacheSync, 1);
1091     }
1092   } else {
1093     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1094       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1095     }
1096     if ((X86ICacheSync == 3) && !supports_clwb()) {
1097       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1098     }
1099     if ((X86ICacheSync == 5) && !supports_serialize()) {
1100       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1101     }
1102   }
1103 
1104   stringStream ss(2048);
1105   ss.print("(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1106            cores_per_cpu(), threads_per_core(),
1107            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1108   ss.print(", ");
1109   int features_offset = (int)ss.size();
1110   insert_features_names(_features, ss);
1111 
1112   _cpu_info_string = ss.as_string(true);
1113   _features_string = _cpu_info_string + features_offset;
1114 
1115   // Use AES instructions if available.
1116   if (supports_aes()) {
1117     if (FLAG_IS_DEFAULT(UseAES)) {
1118       FLAG_SET_DEFAULT(UseAES, true);
1119     }
1120     if (!UseAES) {
1121       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1122         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1123       }
1124       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1125     } else {
1126       if (UseSSE > 2) {
1127         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1128           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1129         }
1130       } else {
1131         // The AES intrinsic stubs require AES instruction support (of course)
1132         // but also require sse3 mode or higher for instructions it use.
1133         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1134           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1135         }
1136         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1137       }
1138 
1139       // --AES-CTR begins--
1140       if (!UseAESIntrinsics) {
1141         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1142           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1143           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1144         }
1145       } else {
1146         if (supports_sse4_1()) {
1147           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1148             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1149           }
1150         } else {
1151            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1152            // but also require sse4.1 mode or higher for instructions it use.
1153           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1154              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1155            }
1156            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1157         }
1158       }
1159       // --AES-CTR ends--
1160     }
1161   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1162     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1163       warning("AES instructions are not available on this CPU");
1164       FLAG_SET_DEFAULT(UseAES, false);
1165     }
1166     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1167       warning("AES intrinsics are not available on this CPU");
1168       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1169     }
1170     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1171       warning("AES-CTR intrinsics are not available on this CPU");
1172       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1173     }
1174   }
1175 
1176   // Use CLMUL instructions if available.
1177   if (supports_clmul()) {
1178     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1179       UseCLMUL = true;
1180     }
1181   } else if (UseCLMUL) {
1182     if (!FLAG_IS_DEFAULT(UseCLMUL))
1183       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1184     FLAG_SET_DEFAULT(UseCLMUL, false);
1185   }
1186 
1187   if (UseCLMUL && (UseSSE > 2)) {
1188     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1189       UseCRC32Intrinsics = true;
1190     }
1191   } else if (UseCRC32Intrinsics) {
1192     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1193       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1194     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1195   }
1196 
1197   if (supports_avx2()) {
1198     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1199       UseAdler32Intrinsics = true;
1200     }
1201   } else if (UseAdler32Intrinsics) {
1202     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1203       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1204     }
1205     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1206   }
1207 
1208   if (supports_sse4_2() && supports_clmul()) {
1209     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1210       UseCRC32CIntrinsics = true;
1211     }
1212   } else if (UseCRC32CIntrinsics) {
1213     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1214       warning("CRC32C intrinsics are not available on this CPU");
1215     }
1216     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1217   }
1218 
1219   // GHASH/GCM intrinsics
1220   if (UseCLMUL && (UseSSE > 2)) {
1221     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1222       UseGHASHIntrinsics = true;
1223     }
1224   } else if (UseGHASHIntrinsics) {
1225     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1226       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1227     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1228   }
1229 
1230   // ChaCha20 Intrinsics
1231   // As long as the system supports AVX as a baseline we can do a
1232   // SIMD-enabled block function.  StubGenerator makes the determination
1233   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1234   // version.
1235   if (UseAVX >= 1) {
1236       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1237           UseChaCha20Intrinsics = true;
1238       }
1239   } else if (UseChaCha20Intrinsics) {
1240       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1241           warning("ChaCha20 intrinsic requires AVX instructions");
1242       }
1243       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1244   }
1245 
1246   // Kyber Intrinsics
1247   // Currently we only have them for AVX512
1248 #ifdef _LP64
1249   if (supports_evex() && supports_avx512bw()) {
1250       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1251           UseKyberIntrinsics = true;
1252       }
1253   } else
1254 #endif
1255   if (UseKyberIntrinsics) {
1256      warning("Intrinsics for ML-KEM are not available on this CPU.");
1257      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1258   }
1259 
1260   // Dilithium Intrinsics
1261   // Currently we only have them for AVX512
1262   if (supports_evex() && supports_avx512bw()) {
1263       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1264           UseDilithiumIntrinsics = true;
1265       }
1266   } else if (UseDilithiumIntrinsics) {
1267       warning("Intrinsics for ML-DSA are not available on this CPU.");
1268       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1269   }
1270 
1271   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1272   if (UseAVX >= 2) {
1273     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1274       UseBASE64Intrinsics = true;
1275     }
1276   } else if (UseBASE64Intrinsics) {
1277      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1278       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1279     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1280   }
1281 
1282   if (supports_fma()) {
1283     if (FLAG_IS_DEFAULT(UseFMA)) {
1284       UseFMA = true;
1285     }
1286   } else if (UseFMA) {
1287     warning("FMA instructions are not available on this CPU");
1288     FLAG_SET_DEFAULT(UseFMA, false);
1289   }
1290 
1291   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1292     UseMD5Intrinsics = true;
1293   }
1294 
1295   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1296     if (FLAG_IS_DEFAULT(UseSHA)) {
1297       UseSHA = true;
1298     }
1299   } else if (UseSHA) {
1300     warning("SHA instructions are not available on this CPU");
1301     FLAG_SET_DEFAULT(UseSHA, false);
1302   }
1303 
1304   if (supports_sha() && supports_sse4_1() && UseSHA) {
1305     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1306       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1307     }
1308   } else if (UseSHA1Intrinsics) {
1309     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1310     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1311   }
1312 
1313   if (supports_sse4_1() && UseSHA) {
1314     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1315       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1316     }
1317   } else if (UseSHA256Intrinsics) {
1318     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1319     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1320   }
1321 
1322   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1323     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1324       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1325     }
1326   } else if (UseSHA512Intrinsics) {
1327     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1328     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1329   }
1330 
1331   if (supports_evex() && supports_avx512bw()) {
1332       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1333           UseSHA3Intrinsics = true;
1334       }
1335   } else if (UseSHA3Intrinsics) {
1336       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1337       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1338   }
1339 
1340   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1341     FLAG_SET_DEFAULT(UseSHA, false);
1342   }
1343 
1344 #if COMPILER2_OR_JVMCI
1345   int max_vector_size = 0;
1346   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1347     // 16 byte vectors (in XMM) are supported with SSE2+
1348     max_vector_size = 16;
1349   } else if (UseAVX == 1 || UseAVX == 2) {
1350     // 32 bytes vectors (in YMM) are only supported with AVX+
1351     max_vector_size = 32;
1352   } else if (UseAVX > 2) {
1353     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1354     max_vector_size = 64;
1355   }
1356 
1357   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1358 
1359   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1360     if (MaxVectorSize < min_vector_size) {
1361       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1362       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1363     }
1364     if (MaxVectorSize > max_vector_size) {
1365       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1366       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1367     }
1368     if (!is_power_of_2(MaxVectorSize)) {
1369       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1370       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1371     }
1372   } else {
1373     // If default, use highest supported configuration
1374     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1375   }
1376 
1377 #if defined(COMPILER2) && defined(ASSERT)
1378   if (MaxVectorSize > 0) {
1379     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1380       tty->print_cr("State of YMM registers after signal handle:");
1381       int nreg = 4;
1382       const char* ymm_name[4] = {"0", "7", "8", "15"};
1383       for (int i = 0; i < nreg; i++) {
1384         tty->print("YMM%s:", ymm_name[i]);
1385         for (int j = 7; j >=0; j--) {
1386           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1387         }
1388         tty->cr();
1389       }
1390     }
1391   }
1392 #endif // COMPILER2 && ASSERT
1393 
1394   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1395     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1396       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1397     }
1398   } else if (UsePoly1305Intrinsics) {
1399     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1400     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1401   }
1402 
1403   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1404     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1405       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1406     }
1407   } else if (UseIntPolyIntrinsics) {
1408     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1409     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1410   }
1411 
1412   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1413     UseMultiplyToLenIntrinsic = true;
1414   }
1415   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1416     UseSquareToLenIntrinsic = true;
1417   }
1418   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1419     UseMulAddIntrinsic = true;
1420   }
1421   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1422     UseMontgomeryMultiplyIntrinsic = true;
1423   }
1424   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1425     UseMontgomerySquareIntrinsic = true;
1426   }
1427 #endif // COMPILER2_OR_JVMCI
1428 
1429   // On new cpus instructions which update whole XMM register should be used
1430   // to prevent partial register stall due to dependencies on high half.
1431   //
1432   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1433   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1434   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1435   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1436 
1437 
1438   if (is_zx()) { // ZX cpus specific settings
1439     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1440       UseStoreImmI16 = false; // don't use it on ZX cpus
1441     }
1442     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1443       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1444         // Use it on all ZX cpus
1445         UseAddressNop = true;
1446       }
1447     }
1448     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1449       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1450     }
1451     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1452       if (supports_sse3()) {
1453         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1454       } else {
1455         UseXmmRegToRegMoveAll = false;
1456       }
1457     }
1458     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1459 #ifdef COMPILER2
1460       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1461         // For new ZX cpus do the next optimization:
1462         // don't align the beginning of a loop if there are enough instructions
1463         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1464         // in current fetch line (OptoLoopAlignment) or the padding
1465         // is big (> MaxLoopPad).
1466         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1467         // generated NOP instructions. 11 is the largest size of one
1468         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1469         MaxLoopPad = 11;
1470       }
1471 #endif // COMPILER2
1472       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1473         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1474       }
1475       if (supports_sse4_2()) { // new ZX cpus
1476         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1477           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1478         }
1479       }
1480       if (supports_sse4_2()) {
1481         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1482           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1483         }
1484       } else {
1485         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1486           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1487         }
1488         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1489       }
1490     }
1491 
1492     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1493       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1494     }
1495   }
1496 
1497   if (is_amd_family()) { // AMD cpus specific settings
1498     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1499       // Use it on new AMD cpus starting from Opteron.
1500       UseAddressNop = true;
1501     }
1502     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1503       // Use it on new AMD cpus starting from Opteron.
1504       UseNewLongLShift = true;
1505     }
1506     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1507       if (supports_sse4a()) {
1508         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1509       } else {
1510         UseXmmLoadAndClearUpper = false;
1511       }
1512     }
1513     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1514       if (supports_sse4a()) {
1515         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1516       } else {
1517         UseXmmRegToRegMoveAll = false;
1518       }
1519     }
1520     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1521       if (supports_sse4a()) {
1522         UseXmmI2F = true;
1523       } else {
1524         UseXmmI2F = false;
1525       }
1526     }
1527     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1528       if (supports_sse4a()) {
1529         UseXmmI2D = true;
1530       } else {
1531         UseXmmI2D = false;
1532       }
1533     }
1534     if (supports_sse4_2()) {
1535       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1536         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1537       }
1538     } else {
1539       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1540         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1541       }
1542       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1543     }
1544 
1545     // some defaults for AMD family 15h
1546     if (cpu_family() == 0x15) {
1547       // On family 15h processors default is no sw prefetch
1548       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1549         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1550       }
1551       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1552       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1553         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1554       }
1555       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1556       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1557         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1558       }
1559       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1560         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1561       }
1562     }
1563 
1564 #ifdef COMPILER2
1565     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1566       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1567       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1568     }
1569 #endif // COMPILER2
1570 
1571     // Some defaults for AMD family >= 17h && Hygon family 18h
1572     if (cpu_family() >= 0x17) {
1573       // On family >=17h processors use XMM and UnalignedLoadStores
1574       // for Array Copy
1575       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1576         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1577       }
1578       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1579         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1580       }
1581 #ifdef COMPILER2
1582       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1583         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1584       }
1585 #endif
1586     }
1587   }
1588 
1589   if (is_intel()) { // Intel cpus specific settings
1590     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1591       UseStoreImmI16 = false; // don't use it on Intel cpus
1592     }
1593     if (cpu_family() == 6 || cpu_family() == 15) {
1594       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1595         // Use it on all Intel cpus starting from PentiumPro
1596         UseAddressNop = true;
1597       }
1598     }
1599     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1600       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1601     }
1602     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1603       if (supports_sse3()) {
1604         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1605       } else {
1606         UseXmmRegToRegMoveAll = false;
1607       }
1608     }
1609     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1610 #ifdef COMPILER2
1611       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1612         // For new Intel cpus do the next optimization:
1613         // don't align the beginning of a loop if there are enough instructions
1614         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1615         // in current fetch line (OptoLoopAlignment) or the padding
1616         // is big (> MaxLoopPad).
1617         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1618         // generated NOP instructions. 11 is the largest size of one
1619         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1620         MaxLoopPad = 11;
1621       }
1622 #endif // COMPILER2
1623 
1624       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1625         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1626       }
1627       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1628         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1629           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1630         }
1631       }
1632       if (supports_sse4_2()) {
1633         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1634           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1635         }
1636       } else {
1637         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1638           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1639         }
1640         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1641       }
1642     }
1643     if (is_atom_family() || is_knights_family()) {
1644 #ifdef COMPILER2
1645       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1646         OptoScheduling = true;
1647       }
1648 #endif
1649       if (supports_sse4_2()) { // Silvermont
1650         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1651           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1652         }
1653       }
1654       if (FLAG_IS_DEFAULT(UseIncDec)) {
1655         FLAG_SET_DEFAULT(UseIncDec, false);
1656       }
1657     }
1658     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1659       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1660     }
1661 #ifdef COMPILER2
1662     if (UseAVX > 2) {
1663       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1664           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1665            ArrayOperationPartialInlineSize != 0 &&
1666            ArrayOperationPartialInlineSize != 16 &&
1667            ArrayOperationPartialInlineSize != 32 &&
1668            ArrayOperationPartialInlineSize != 64)) {
1669         int inline_size = 0;
1670         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1671           inline_size = 64;
1672         } else if (MaxVectorSize >= 32) {
1673           inline_size = 32;
1674         } else if (MaxVectorSize >= 16) {
1675           inline_size = 16;
1676         }
1677         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1678           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1679         }
1680         ArrayOperationPartialInlineSize = inline_size;
1681       }
1682 
1683       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1684         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1685         if (ArrayOperationPartialInlineSize) {
1686           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1687         } else {
1688           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1689         }
1690       }
1691     }
1692 #endif
1693   }
1694 
1695 #ifdef COMPILER2
1696   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1697     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1698       OptimizeFill = false;
1699     }
1700   }
1701 #endif
1702 
1703   if (UseSSE42Intrinsics) {
1704     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1705       UseVectorizedMismatchIntrinsic = true;
1706     }
1707   } else if (UseVectorizedMismatchIntrinsic) {
1708     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1709       warning("vectorizedMismatch intrinsics are not available on this CPU");
1710     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1711   }
1712   if (UseAVX >= 2) {
1713     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1714   } else if (UseVectorizedHashCodeIntrinsic) {
1715     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1716       warning("vectorizedHashCode intrinsics are not available on this CPU");
1717     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1718   }
1719 
1720   // Use count leading zeros count instruction if available.
1721   if (supports_lzcnt()) {
1722     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1723       UseCountLeadingZerosInstruction = true;
1724     }
1725    } else if (UseCountLeadingZerosInstruction) {
1726     warning("lzcnt instruction is not available on this CPU");
1727     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1728   }
1729 
1730   // Use count trailing zeros instruction if available
1731   if (supports_bmi1()) {
1732     // tzcnt does not require VEX prefix
1733     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1734       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1735         // Don't use tzcnt if BMI1 is switched off on command line.
1736         UseCountTrailingZerosInstruction = false;
1737       } else {
1738         UseCountTrailingZerosInstruction = true;
1739       }
1740     }
1741   } else if (UseCountTrailingZerosInstruction) {
1742     warning("tzcnt instruction is not available on this CPU");
1743     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1744   }
1745 
1746   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1747   // VEX prefix is generated only when AVX > 0.
1748   if (supports_bmi1() && supports_avx()) {
1749     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1750       UseBMI1Instructions = true;
1751     }
1752   } else if (UseBMI1Instructions) {
1753     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1754     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1755   }
1756 
1757   if (supports_bmi2() && supports_avx()) {
1758     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1759       UseBMI2Instructions = true;
1760     }
1761   } else if (UseBMI2Instructions) {
1762     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1763     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1764   }
1765 
1766   // Use population count instruction if available.
1767   if (supports_popcnt()) {
1768     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1769       UsePopCountInstruction = true;
1770     }
1771   } else if (UsePopCountInstruction) {
1772     warning("POPCNT instruction is not available on this CPU");
1773     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1774   }
1775 
1776   // Use fast-string operations if available.
1777   if (supports_erms()) {
1778     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1779       UseFastStosb = true;
1780     }
1781   } else if (UseFastStosb) {
1782     warning("fast-string operations are not available on this CPU");
1783     FLAG_SET_DEFAULT(UseFastStosb, false);
1784   }
1785 
1786   // For AMD Processors use XMM/YMM MOVDQU instructions
1787   // for Object Initialization as default
1788   if (is_amd() && cpu_family() >= 0x19) {
1789     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1790       UseFastStosb = false;
1791     }
1792   }
1793 
1794 #ifdef COMPILER2
1795   if (is_intel() && MaxVectorSize > 16) {
1796     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1797       UseFastStosb = false;
1798     }
1799   }
1800 #endif
1801 
1802   // Use XMM/YMM MOVDQU instruction for Object Initialization
1803   if (!UseFastStosb && UseUnalignedLoadStores) {
1804     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1805       UseXMMForObjInit = true;
1806     }
1807   } else if (UseXMMForObjInit) {
1808     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1809     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1810   }
1811 
1812 #ifdef COMPILER2
1813   if (FLAG_IS_DEFAULT(AlignVector)) {
1814     // Modern processors allow misaligned memory operations for vectors.
1815     AlignVector = !UseUnalignedLoadStores;
1816   }
1817 #endif // COMPILER2
1818 
1819   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1820     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1821       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1822     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1823       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1824     }
1825   }
1826 
1827   // Allocation prefetch settings
1828   int cache_line_size = checked_cast<int>(prefetch_data_size());
1829   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1830       (cache_line_size > AllocatePrefetchStepSize)) {
1831     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1832   }
1833 
1834   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1835     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1836     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1837       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1838     }
1839     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1840   }
1841 
1842   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1843     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1844     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1845   }
1846 
1847   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1848     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1849         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1850       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1851     }
1852 #ifdef COMPILER2
1853     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1854       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1855     }
1856 #endif
1857   }
1858 
1859   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1860 #ifdef COMPILER2
1861     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1862       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1863     }
1864 #endif
1865   }
1866 
1867   // Prefetch settings
1868 
1869   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1870   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1871   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1872   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1873 
1874   // gc copy/scan is disabled if prefetchw isn't supported, because
1875   // Prefetch::write emits an inlined prefetchw on Linux.
1876   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1877   // The used prefetcht0 instruction works for both amd64 and em64t.
1878 
1879   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1880     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1881   }
1882   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1883     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1884   }
1885 
1886   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1887      (cache_line_size > ContendedPaddingWidth))
1888      ContendedPaddingWidth = cache_line_size;
1889 
1890   // This machine allows unaligned memory accesses
1891   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1892     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1893   }
1894 
1895 #ifndef PRODUCT
1896   if (log_is_enabled(Info, os, cpu)) {
1897     LogStream ls(Log(os, cpu)::info());
1898     outputStream* log = &ls;
1899     log->print_cr("Logical CPUs per core: %u",
1900                   logical_processors_per_package());
1901     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1902     log->print("UseSSE=%d", UseSSE);
1903     if (UseAVX > 0) {
1904       log->print("  UseAVX=%d", UseAVX);
1905     }
1906     if (UseAES) {
1907       log->print("  UseAES=1");
1908     }
1909 #ifdef COMPILER2
1910     if (MaxVectorSize > 0) {
1911       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1912     }
1913 #endif
1914     log->cr();
1915     log->print("Allocation");
1916     if (AllocatePrefetchStyle <= 0) {
1917       log->print_cr(": no prefetching");
1918     } else {
1919       log->print(" prefetching: ");
1920       if (AllocatePrefetchInstr == 0) {
1921         log->print("PREFETCHNTA");
1922       } else if (AllocatePrefetchInstr == 1) {
1923         log->print("PREFETCHT0");
1924       } else if (AllocatePrefetchInstr == 2) {
1925         log->print("PREFETCHT2");
1926       } else if (AllocatePrefetchInstr == 3) {
1927         log->print("PREFETCHW");
1928       }
1929       if (AllocatePrefetchLines > 1) {
1930         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1931       } else {
1932         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1933       }
1934     }
1935 
1936     if (PrefetchCopyIntervalInBytes > 0) {
1937       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1938     }
1939     if (PrefetchScanIntervalInBytes > 0) {
1940       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1941     }
1942     if (ContendedPaddingWidth > 0) {
1943       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1944     }
1945   }
1946 #endif // !PRODUCT
1947   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1948       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1949   }
1950   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1951       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1952   }
1953 }
1954 
1955 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1956   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1957   if (vrt == XenHVM) {
1958     st->print_cr("Xen hardware-assisted virtualization detected");
1959   } else if (vrt == KVM) {
1960     st->print_cr("KVM virtualization detected");
1961   } else if (vrt == VMWare) {
1962     st->print_cr("VMWare virtualization detected");
1963     VirtualizationSupport::print_virtualization_info(st);
1964   } else if (vrt == HyperV) {
1965     st->print_cr("Hyper-V virtualization detected");
1966   } else if (vrt == HyperVRole) {
1967     st->print_cr("Hyper-V role detected");
1968   }
1969 }
1970 
1971 bool VM_Version::compute_has_intel_jcc_erratum() {
1972   if (!is_intel_family_core()) {
1973     // Only Intel CPUs are affected.
1974     return false;
1975   }
1976   // The following table of affected CPUs is based on the following document released by Intel:
1977   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1978   switch (_model) {
1979   case 0x8E:
1980     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1981     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1982     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1983     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1984     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1985     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1986     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1987     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1988     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1989     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1990   case 0x4E:
1991     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1992     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1993     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1994     return _stepping == 0x3;
1995   case 0x55:
1996     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1997     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1998     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1999     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2000     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2001     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2002     return _stepping == 0x4 || _stepping == 0x7;
2003   case 0x5E:
2004     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2005     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2006     return _stepping == 0x3;
2007   case 0x9E:
2008     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2009     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2010     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2011     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2012     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2013     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2014     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2015     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2016     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2017     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2018     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2019     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2020     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2021     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2022     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2023   case 0xA5:
2024     // Not in Intel documentation.
2025     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2026     return true;
2027   case 0xA6:
2028     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2029     return _stepping == 0x0;
2030   case 0xAE:
2031     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2032     return _stepping == 0xA;
2033   default:
2034     // If we are running on another intel machine not recognized in the table, we are okay.
2035     return false;
2036   }
2037 }
2038 
2039 // On Xen, the cpuid instruction returns
2040 //  eax / registers[0]: Version of Xen
2041 //  ebx / registers[1]: chars 'XenV'
2042 //  ecx / registers[2]: chars 'MMXe'
2043 //  edx / registers[3]: chars 'nVMM'
2044 //
2045 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2046 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2047 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2048 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2049 //
2050 // more information :
2051 // https://kb.vmware.com/s/article/1009458
2052 //
2053 void VM_Version::check_virtualizations() {
2054   uint32_t registers[4] = {0};
2055   char signature[13] = {0};
2056 
2057   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2058   // from 0x40000000 until 0x40010000.
2059   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2060   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2061     detect_virt_stub(leaf, registers);
2062     memcpy(signature, &registers[1], 12);
2063 
2064     if (strncmp("VMwareVMware", signature, 12) == 0) {
2065       Abstract_VM_Version::_detected_virtualization = VMWare;
2066       // check for extended metrics from guestlib
2067       VirtualizationSupport::initialize();
2068     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2069       Abstract_VM_Version::_detected_virtualization = HyperV;
2070 #ifdef _WINDOWS
2071       // CPUID leaf 0x40000007 is available to the root partition only.
2072       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2073       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2074       detect_virt_stub(0x40000007, registers);
2075       if ((registers[0] != 0x0) ||
2076           (registers[1] != 0x0) ||
2077           (registers[2] != 0x0) ||
2078           (registers[3] != 0x0)) {
2079         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2080       }
2081 #endif
2082     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2083       Abstract_VM_Version::_detected_virtualization = KVM;
2084     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2085       Abstract_VM_Version::_detected_virtualization = XenHVM;
2086     }
2087   }
2088 }
2089 
2090 #ifdef COMPILER2
2091 // Determine if it's running on Cascade Lake using default options.
2092 bool VM_Version::is_default_intel_cascade_lake() {
2093   return FLAG_IS_DEFAULT(UseAVX) &&
2094          FLAG_IS_DEFAULT(MaxVectorSize) &&
2095          UseAVX > 2 &&
2096          is_intel_cascade_lake();
2097 }
2098 #endif
2099 
2100 bool VM_Version::is_intel_cascade_lake() {
2101   return is_intel_skylake() && _stepping >= 5;
2102 }
2103 
2104 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2105 // for implementing the array copy and clear operations.
2106 // The Intel platforms that supports the serialize instruction
2107 // has improved implementation of 64-byte load/stores and so the default
2108 // threshold is set to 0 for these platforms.
2109 int VM_Version::avx3_threshold() {
2110   return (is_intel_family_core() &&
2111           supports_serialize() &&
2112           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2113 }
2114 
2115 void VM_Version::clear_apx_test_state() {
2116   clear_apx_test_state_stub();
2117 }
2118 
2119 static bool _vm_version_initialized = false;
2120 
2121 void VM_Version::initialize() {
2122   ResourceMark rm;
2123 
2124   // Making this stub must be FIRST use of assembler
2125   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2126   if (stub_blob == nullptr) {
2127     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2128   }
2129   CodeBuffer c(stub_blob);
2130   VM_Version_StubGenerator g(&c);
2131 
2132   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2133                                      g.generate_get_cpu_info());
2134   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2135                                      g.generate_detect_virt());
2136   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2137                                      g.clear_apx_test_state());
2138   get_processor_features();
2139 
2140   Assembler::precompute_instructions();
2141 
2142   if (VM_Version::supports_hv()) { // Supports hypervisor
2143     check_virtualizations();
2144   }
2145   _vm_version_initialized = true;
2146 }
2147 
2148 typedef enum {
2149    CPU_FAMILY_8086_8088  = 0,
2150    CPU_FAMILY_INTEL_286  = 2,
2151    CPU_FAMILY_INTEL_386  = 3,
2152    CPU_FAMILY_INTEL_486  = 4,
2153    CPU_FAMILY_PENTIUM    = 5,
2154    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2155    CPU_FAMILY_PENTIUM_4  = 0xF
2156 } FamilyFlag;
2157 
2158 typedef enum {
2159   RDTSCP_FLAG  = 0x08000000, // bit 27
2160   INTEL64_FLAG = 0x20000000  // bit 29
2161 } _featureExtendedEdxFlag;
2162 
2163 typedef enum {
2164    FPU_FLAG     = 0x00000001,
2165    VME_FLAG     = 0x00000002,
2166    DE_FLAG      = 0x00000004,
2167    PSE_FLAG     = 0x00000008,
2168    TSC_FLAG     = 0x00000010,
2169    MSR_FLAG     = 0x00000020,
2170    PAE_FLAG     = 0x00000040,
2171    MCE_FLAG     = 0x00000080,
2172    CX8_FLAG     = 0x00000100,
2173    APIC_FLAG    = 0x00000200,
2174    SEP_FLAG     = 0x00000800,
2175    MTRR_FLAG    = 0x00001000,
2176    PGE_FLAG     = 0x00002000,
2177    MCA_FLAG     = 0x00004000,
2178    CMOV_FLAG    = 0x00008000,
2179    PAT_FLAG     = 0x00010000,
2180    PSE36_FLAG   = 0x00020000,
2181    PSNUM_FLAG   = 0x00040000,
2182    CLFLUSH_FLAG = 0x00080000,
2183    DTS_FLAG     = 0x00200000,
2184    ACPI_FLAG    = 0x00400000,
2185    MMX_FLAG     = 0x00800000,
2186    FXSR_FLAG    = 0x01000000,
2187    SSE_FLAG     = 0x02000000,
2188    SSE2_FLAG    = 0x04000000,
2189    SS_FLAG      = 0x08000000,
2190    HTT_FLAG     = 0x10000000,
2191    TM_FLAG      = 0x20000000
2192 } FeatureEdxFlag;
2193 
2194 static BufferBlob* cpuid_brand_string_stub_blob;
2195 static const int   cpuid_brand_string_stub_size = 550;
2196 
2197 extern "C" {
2198   typedef void (*getCPUIDBrandString_stub_t)(void*);
2199 }
2200 
2201 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2202 
2203 // VM_Version statics
2204 enum {
2205   ExtendedFamilyIdLength_INTEL = 16,
2206   ExtendedFamilyIdLength_AMD   = 24
2207 };
2208 
2209 const size_t VENDOR_LENGTH = 13;
2210 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2211 static char* _cpu_brand_string = nullptr;
2212 static int64_t _max_qualified_cpu_frequency = 0;
2213 
2214 static int _no_of_threads = 0;
2215 static int _no_of_cores = 0;
2216 
2217 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2218   "8086/8088",
2219   "",
2220   "286",
2221   "386",
2222   "486",
2223   "Pentium",
2224   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2225   "",
2226   "",
2227   "",
2228   "",
2229   "",
2230   "",
2231   "",
2232   "",
2233   "Pentium 4"
2234 };
2235 
2236 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2237   "",
2238   "",
2239   "",
2240   "",
2241   "5x86",
2242   "K5/K6",
2243   "Athlon/AthlonXP",
2244   "",
2245   "",
2246   "",
2247   "",
2248   "",
2249   "",
2250   "",
2251   "",
2252   "Opteron/Athlon64",
2253   "Opteron QC/Phenom",  // Barcelona et.al.
2254   "",
2255   "",
2256   "",
2257   "",
2258   "",
2259   "",
2260   "Zen"
2261 };
2262 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2263 // September 2013, Vol 3C Table 35-1
2264 const char* const _model_id_pentium_pro[] = {
2265   "",
2266   "Pentium Pro",
2267   "",
2268   "Pentium II model 3",
2269   "",
2270   "Pentium II model 5/Xeon/Celeron",
2271   "Celeron",
2272   "Pentium III/Pentium III Xeon",
2273   "Pentium III/Pentium III Xeon",
2274   "Pentium M model 9",    // Yonah
2275   "Pentium III, model A",
2276   "Pentium III, model B",
2277   "",
2278   "Pentium M model D",    // Dothan
2279   "",
2280   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2281   "",
2282   "",
2283   "",
2284   "",
2285   "",
2286   "",
2287   "Celeron",              // 0x16 Celeron 65nm
2288   "Core 2",               // 0x17 Penryn / Harpertown
2289   "",
2290   "",
2291   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2292   "Atom",                 // 0x1B Z5xx series Silverthorn
2293   "",
2294   "Core 2",               // 0x1D Dunnington (6-core)
2295   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2296   "",
2297   "",
2298   "",
2299   "",
2300   "",
2301   "",
2302   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2303   "",
2304   "",
2305   "",                     // 0x28
2306   "",
2307   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2308   "",
2309   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2310   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2311   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2312   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2313   "",
2314   "",
2315   "",
2316   "",
2317   "",
2318   "",
2319   "",
2320   "",
2321   "",
2322   "",
2323   "Ivy Bridge",           // 0x3a
2324   "",
2325   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2326   "",                     // 0x3d "Next Generation Intel Core Processor"
2327   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2328   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2329   "",
2330   "",
2331   "",
2332   "",
2333   "",
2334   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2335   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2336   nullptr
2337 };
2338 
2339 /* Brand ID is for back compatibility
2340  * Newer CPUs uses the extended brand string */
2341 const char* const _brand_id[] = {
2342   "",
2343   "Celeron processor",
2344   "Pentium III processor",
2345   "Intel Pentium III Xeon processor",
2346   "",
2347   "",
2348   "",
2349   "",
2350   "Intel Pentium 4 processor",
2351   nullptr
2352 };
2353 
2354 
2355 const char* const _feature_edx_id[] = {
2356   "On-Chip FPU",
2357   "Virtual Mode Extensions",
2358   "Debugging Extensions",
2359   "Page Size Extensions",
2360   "Time Stamp Counter",
2361   "Model Specific Registers",
2362   "Physical Address Extension",
2363   "Machine Check Exceptions",
2364   "CMPXCHG8B Instruction",
2365   "On-Chip APIC",
2366   "",
2367   "Fast System Call",
2368   "Memory Type Range Registers",
2369   "Page Global Enable",
2370   "Machine Check Architecture",
2371   "Conditional Mov Instruction",
2372   "Page Attribute Table",
2373   "36-bit Page Size Extension",
2374   "Processor Serial Number",
2375   "CLFLUSH Instruction",
2376   "",
2377   "Debug Trace Store feature",
2378   "ACPI registers in MSR space",
2379   "Intel Architecture MMX Technology",
2380   "Fast Float Point Save and Restore",
2381   "Streaming SIMD extensions",
2382   "Streaming SIMD extensions 2",
2383   "Self-Snoop",
2384   "Hyper Threading",
2385   "Thermal Monitor",
2386   "",
2387   "Pending Break Enable"
2388 };
2389 
2390 const char* const _feature_extended_edx_id[] = {
2391   "",
2392   "",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "SYSCALL/SYSRET",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "",
2408   "",
2409   "",
2410   "",
2411   "Execute Disable Bit",
2412   "",
2413   "",
2414   "",
2415   "",
2416   "",
2417   "",
2418   "RDTSCP",
2419   "",
2420   "Intel 64 Architecture",
2421   "",
2422   ""
2423 };
2424 
2425 const char* const _feature_ecx_id[] = {
2426   "Streaming SIMD Extensions 3",
2427   "PCLMULQDQ",
2428   "64-bit DS Area",
2429   "MONITOR/MWAIT instructions",
2430   "CPL Qualified Debug Store",
2431   "Virtual Machine Extensions",
2432   "Safer Mode Extensions",
2433   "Enhanced Intel SpeedStep technology",
2434   "Thermal Monitor 2",
2435   "Supplemental Streaming SIMD Extensions 3",
2436   "L1 Context ID",
2437   "",
2438   "Fused Multiply-Add",
2439   "CMPXCHG16B",
2440   "xTPR Update Control",
2441   "Perfmon and Debug Capability",
2442   "",
2443   "Process-context identifiers",
2444   "Direct Cache Access",
2445   "Streaming SIMD extensions 4.1",
2446   "Streaming SIMD extensions 4.2",
2447   "x2APIC",
2448   "MOVBE",
2449   "Popcount instruction",
2450   "TSC-Deadline",
2451   "AESNI",
2452   "XSAVE",
2453   "OSXSAVE",
2454   "AVX",
2455   "F16C",
2456   "RDRAND",
2457   ""
2458 };
2459 
2460 const char* const _feature_extended_ecx_id[] = {
2461   "LAHF/SAHF instruction support",
2462   "Core multi-processor legacy mode",
2463   "",
2464   "",
2465   "",
2466   "Advanced Bit Manipulations: LZCNT",
2467   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2468   "Misaligned SSE mode",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "",
2485   "",
2486   "",
2487   "",
2488   "",
2489   "",
2490   "",
2491   "",
2492   ""
2493 };
2494 
2495 void VM_Version::initialize_tsc(void) {
2496   ResourceMark rm;
2497 
2498   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2499   if (cpuid_brand_string_stub_blob == nullptr) {
2500     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2501   }
2502   CodeBuffer c(cpuid_brand_string_stub_blob);
2503   VM_Version_StubGenerator g(&c);
2504   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2505                                    g.generate_getCPUIDBrandString());
2506 }
2507 
2508 const char* VM_Version::cpu_model_description(void) {
2509   uint32_t cpu_family = extended_cpu_family();
2510   uint32_t cpu_model = extended_cpu_model();
2511   const char* model = nullptr;
2512 
2513   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2514     for (uint32_t i = 0; i <= cpu_model; i++) {
2515       model = _model_id_pentium_pro[i];
2516       if (model == nullptr) {
2517         break;
2518       }
2519     }
2520   }
2521   return model;
2522 }
2523 
2524 const char* VM_Version::cpu_brand_string(void) {
2525   if (_cpu_brand_string == nullptr) {
2526     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2527     if (nullptr == _cpu_brand_string) {
2528       return nullptr;
2529     }
2530     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2531     if (ret_val != OS_OK) {
2532       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2533       _cpu_brand_string = nullptr;
2534     }
2535   }
2536   return _cpu_brand_string;
2537 }
2538 
2539 const char* VM_Version::cpu_brand(void) {
2540   const char*  brand  = nullptr;
2541 
2542   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2543     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2544     brand = _brand_id[0];
2545     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2546       brand = _brand_id[i];
2547     }
2548   }
2549   return brand;
2550 }
2551 
2552 bool VM_Version::cpu_is_em64t(void) {
2553   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2554 }
2555 
2556 bool VM_Version::is_netburst(void) {
2557   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2558 }
2559 
2560 bool VM_Version::supports_tscinv_ext(void) {
2561   if (!supports_tscinv_bit()) {
2562     return false;
2563   }
2564 
2565   if (is_intel()) {
2566     return true;
2567   }
2568 
2569   if (is_amd()) {
2570     return !is_amd_Barcelona();
2571   }
2572 
2573   if (is_hygon()) {
2574     return true;
2575   }
2576 
2577   return false;
2578 }
2579 
2580 void VM_Version::resolve_cpu_information_details(void) {
2581 
2582   // in future we want to base this information on proper cpu
2583   // and cache topology enumeration such as:
2584   // Intel 64 Architecture Processor Topology Enumeration
2585   // which supports system cpu and cache topology enumeration
2586   // either using 2xAPICIDs or initial APICIDs
2587 
2588   // currently only rough cpu information estimates
2589   // which will not necessarily reflect the exact configuration of the system
2590 
2591   // this is the number of logical hardware threads
2592   // visible to the operating system
2593   _no_of_threads = os::processor_count();
2594 
2595   // find out number of threads per cpu package
2596   int threads_per_package = threads_per_core() * cores_per_cpu();
2597 
2598   // use amount of threads visible to the process in order to guess number of sockets
2599   _no_of_sockets = _no_of_threads / threads_per_package;
2600 
2601   // process might only see a subset of the total number of threads
2602   // from a single processor package. Virtualization/resource management for example.
2603   // If so then just write a hard 1 as num of pkgs.
2604   if (0 == _no_of_sockets) {
2605     _no_of_sockets = 1;
2606   }
2607 
2608   // estimate the number of cores
2609   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2610 }
2611 
2612 
2613 const char* VM_Version::cpu_family_description(void) {
2614   int cpu_family_id = extended_cpu_family();
2615   if (is_amd()) {
2616     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2617       return _family_id_amd[cpu_family_id];
2618     }
2619   }
2620   if (is_intel()) {
2621     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2622       return cpu_model_description();
2623     }
2624     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2625       return _family_id_intel[cpu_family_id];
2626     }
2627   }
2628   if (is_hygon()) {
2629     return "Dhyana";
2630   }
2631   return "Unknown x86";
2632 }
2633 
2634 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2635   assert(buf != nullptr, "buffer is null!");
2636   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2637 
2638   const char* cpu_type = nullptr;
2639   const char* x64 = nullptr;
2640 
2641   if (is_intel()) {
2642     cpu_type = "Intel";
2643     x64 = cpu_is_em64t() ? " Intel64" : "";
2644   } else if (is_amd()) {
2645     cpu_type = "AMD";
2646     x64 = cpu_is_em64t() ? " AMD64" : "";
2647   } else if (is_hygon()) {
2648     cpu_type = "Hygon";
2649     x64 = cpu_is_em64t() ? " AMD64" : "";
2650   } else {
2651     cpu_type = "Unknown x86";
2652     x64 = cpu_is_em64t() ? " x86_64" : "";
2653   }
2654 
2655   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2656     cpu_type,
2657     cpu_family_description(),
2658     supports_ht() ? " (HT)" : "",
2659     supports_sse3() ? " SSE3" : "",
2660     supports_ssse3() ? " SSSE3" : "",
2661     supports_sse4_1() ? " SSE4.1" : "",
2662     supports_sse4_2() ? " SSE4.2" : "",
2663     supports_sse4a() ? " SSE4A" : "",
2664     is_netburst() ? " Netburst" : "",
2665     is_intel_family_core() ? " Core" : "",
2666     x64);
2667 
2668   return OS_OK;
2669 }
2670 
2671 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2672   assert(buf != nullptr, "buffer is null!");
2673   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2674   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2675 
2676   // invoke newly generated asm code to fetch CPU Brand String
2677   getCPUIDBrandString_stub(&_cpuid_info);
2678 
2679   // fetch results into buffer
2680   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2681   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2682   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2683   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2684   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2685   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2686   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2687   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2688   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2689   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2690   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2691   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2692 
2693   return OS_OK;
2694 }
2695 
2696 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2697   guarantee(buf != nullptr, "buffer is null!");
2698   guarantee(buf_len > 0, "buffer len not enough!");
2699 
2700   unsigned int flag = 0;
2701   unsigned int fi = 0;
2702   size_t       written = 0;
2703   const char*  prefix = "";
2704 
2705 #define WRITE_TO_BUF(string)                                                          \
2706   {                                                                                   \
2707     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2708     if (res < 0) {                                                                    \
2709       return buf_len - 1;                                                             \
2710     }                                                                                 \
2711     written += res;                                                                   \
2712     if (prefix[0] == '\0') {                                                          \
2713       prefix = ", ";                                                                  \
2714     }                                                                                 \
2715   }
2716 
2717   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2718     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2719       continue; /* no hyperthreading */
2720     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2721       continue; /* no fast system call */
2722     }
2723     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2724       WRITE_TO_BUF(_feature_edx_id[fi]);
2725     }
2726   }
2727 
2728   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2729     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2730       WRITE_TO_BUF(_feature_ecx_id[fi]);
2731     }
2732   }
2733 
2734   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2735     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2736       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2737     }
2738   }
2739 
2740   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2741     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2742       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2743     }
2744   }
2745 
2746   if (supports_tscinv_bit()) {
2747       WRITE_TO_BUF("Invariant TSC");
2748   }
2749 
2750   return written;
2751 }
2752 
2753 /**
2754  * Write a detailed description of the cpu to a given buffer, including
2755  * feature set.
2756  */
2757 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2758   assert(buf != nullptr, "buffer is null!");
2759   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2760 
2761   static const char* unknown = "<unknown>";
2762   char               vendor_id[VENDOR_LENGTH];
2763   const char*        family = nullptr;
2764   const char*        model = nullptr;
2765   const char*        brand = nullptr;
2766   int                outputLen = 0;
2767 
2768   family = cpu_family_description();
2769   if (family == nullptr) {
2770     family = unknown;
2771   }
2772 
2773   model = cpu_model_description();
2774   if (model == nullptr) {
2775     model = unknown;
2776   }
2777 
2778   brand = cpu_brand_string();
2779 
2780   if (brand == nullptr) {
2781     brand = cpu_brand();
2782     if (brand == nullptr) {
2783       brand = unknown;
2784     }
2785   }
2786 
2787   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2788   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2789   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2790   vendor_id[VENDOR_LENGTH-1] = '\0';
2791 
2792   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2793     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2794     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2795     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2796     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2797     "Supports: ",
2798     brand,
2799     vendor_id,
2800     family,
2801     extended_cpu_family(),
2802     model,
2803     extended_cpu_model(),
2804     cpu_stepping(),
2805     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2806     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2807     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2808     _cpuid_info.std_cpuid1_eax.value,
2809     _cpuid_info.std_cpuid1_ebx.value,
2810     _cpuid_info.std_cpuid1_ecx.value,
2811     _cpuid_info.std_cpuid1_edx.value,
2812     _cpuid_info.ext_cpuid1_eax,
2813     _cpuid_info.ext_cpuid1_ebx,
2814     _cpuid_info.ext_cpuid1_ecx,
2815     _cpuid_info.ext_cpuid1_edx);
2816 
2817   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2818     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2819     return OS_ERR;
2820   }
2821 
2822   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2823 
2824   return OS_OK;
2825 }
2826 
2827 
2828 // Fill in Abstract_VM_Version statics
2829 void VM_Version::initialize_cpu_information() {
2830   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2831   assert(!_initialized, "shouldn't be initialized yet");
2832   resolve_cpu_information_details();
2833 
2834   // initialize cpu_name and cpu_desc
2835   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2836   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2837   _initialized = true;
2838 }
2839 
2840 /**
2841  *  For information about extracting the frequency from the cpu brand string, please see:
2842  *
2843  *    Intel Processor Identification and the CPUID Instruction
2844  *    Application Note 485
2845  *    May 2012
2846  *
2847  * The return value is the frequency in Hz.
2848  */
2849 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2850   const char* const brand_string = cpu_brand_string();
2851   if (brand_string == nullptr) {
2852     return 0;
2853   }
2854   const int64_t MEGA = 1000000;
2855   int64_t multiplier = 0;
2856   int64_t frequency = 0;
2857   uint8_t idx = 0;
2858   // The brand string buffer is at most 48 bytes.
2859   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2860   for (; idx < 48-2; ++idx) {
2861     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2862     // Search brand string for "yHz" where y is M, G, or T.
2863     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2864       if (brand_string[idx] == 'M') {
2865         multiplier = MEGA;
2866       } else if (brand_string[idx] == 'G') {
2867         multiplier = MEGA * 1000;
2868       } else if (brand_string[idx] == 'T') {
2869         multiplier = MEGA * MEGA;
2870       }
2871       break;
2872     }
2873   }
2874   if (multiplier > 0) {
2875     // Compute frequency (in Hz) from brand string.
2876     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2877       frequency =  (brand_string[idx-4] - '0') * multiplier;
2878       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2879       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2880     } else { // format is "xxxx"
2881       frequency =  (brand_string[idx-4] - '0') * 1000;
2882       frequency += (brand_string[idx-3] - '0') * 100;
2883       frequency += (brand_string[idx-2] - '0') * 10;
2884       frequency += (brand_string[idx-1] - '0');
2885       frequency *= multiplier;
2886     }
2887   }
2888   return frequency;
2889 }
2890 
2891 
2892 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2893   if (_max_qualified_cpu_frequency == 0) {
2894     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2895   }
2896   return _max_qualified_cpu_frequency;
2897 }
2898 
2899 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2900   VM_Features vm_features;
2901   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2902     vm_features.set_feature(CPU_CX8);
2903   if (std_cpuid1_edx.bits.cmov != 0)
2904     vm_features.set_feature(CPU_CMOV);
2905   if (std_cpuid1_edx.bits.clflush != 0)
2906     vm_features.set_feature(CPU_FLUSH);
2907   // clflush should always be available on x86_64
2908   // if not we are in real trouble because we rely on it
2909   // to flush the code cache.
2910   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2911   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2912       ext_cpuid1_edx.bits.fxsr != 0))
2913     vm_features.set_feature(CPU_FXSR);
2914   // HT flag is set for multi-core processors also.
2915   if (threads_per_core() > 1)
2916     vm_features.set_feature(CPU_HT);
2917   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2918       ext_cpuid1_edx.bits.mmx != 0))
2919     vm_features.set_feature(CPU_MMX);
2920   if (std_cpuid1_edx.bits.sse != 0)
2921     vm_features.set_feature(CPU_SSE);
2922   if (std_cpuid1_edx.bits.sse2 != 0)
2923     vm_features.set_feature(CPU_SSE2);
2924   if (std_cpuid1_ecx.bits.sse3 != 0)
2925     vm_features.set_feature(CPU_SSE3);
2926   if (std_cpuid1_ecx.bits.ssse3 != 0)
2927     vm_features.set_feature(CPU_SSSE3);
2928   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2929     vm_features.set_feature(CPU_SSE4_1);
2930   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2931     vm_features.set_feature(CPU_SSE4_2);
2932   if (std_cpuid1_ecx.bits.popcnt != 0)
2933     vm_features.set_feature(CPU_POPCNT);
2934   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2935       xem_xcr0_eax.bits.apx_f != 0) {
2936     vm_features.set_feature(CPU_APX_F);
2937   }
2938   if (std_cpuid1_ecx.bits.avx != 0 &&
2939       std_cpuid1_ecx.bits.osxsave != 0 &&
2940       xem_xcr0_eax.bits.sse != 0 &&
2941       xem_xcr0_eax.bits.ymm != 0) {
2942     vm_features.set_feature(CPU_AVX);
2943     vm_features.set_feature(CPU_VZEROUPPER);
2944     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2945       vm_features.set_feature(CPU_SHA512);
2946     if (std_cpuid1_ecx.bits.f16c != 0)
2947       vm_features.set_feature(CPU_F16C);
2948     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2949       vm_features.set_feature(CPU_AVX2);
2950       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2951         vm_features.set_feature(CPU_AVX_IFMA);
2952     }
2953     if (sef_cpuid7_ecx.bits.gfni != 0)
2954         vm_features.set_feature(CPU_GFNI);
2955     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2956         xem_xcr0_eax.bits.opmask != 0 &&
2957         xem_xcr0_eax.bits.zmm512 != 0 &&
2958         xem_xcr0_eax.bits.zmm32 != 0) {
2959       vm_features.set_feature(CPU_AVX512F);
2960       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2961         vm_features.set_feature(CPU_AVX512CD);
2962       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2963         vm_features.set_feature(CPU_AVX512DQ);
2964       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2965         vm_features.set_feature(CPU_AVX512_IFMA);
2966       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2967         vm_features.set_feature(CPU_AVX512PF);
2968       if (sef_cpuid7_ebx.bits.avx512er != 0)
2969         vm_features.set_feature(CPU_AVX512ER);
2970       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2971         vm_features.set_feature(CPU_AVX512BW);
2972       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2973         vm_features.set_feature(CPU_AVX512VL);
2974       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2975         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2976       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2977         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2978       if (sef_cpuid7_ecx.bits.vaes != 0)
2979         vm_features.set_feature(CPU_AVX512_VAES);
2980       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2981         vm_features.set_feature(CPU_AVX512_VNNI);
2982       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2983         vm_features.set_feature(CPU_AVX512_BITALG);
2984       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2985         vm_features.set_feature(CPU_AVX512_VBMI);
2986       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2987         vm_features.set_feature(CPU_AVX512_VBMI2);
2988     }
2989     if (is_intel()) {
2990       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2991           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2992           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2993           xem_xcr0_eax.bits.opmask != 0 &&
2994           xem_xcr0_eax.bits.zmm512 != 0 &&
2995           xem_xcr0_eax.bits.zmm32 != 0) {
2996         vm_features.set_feature(CPU_AVX10_1);
2997         vm_features.set_feature(CPU_AVX512F);
2998         vm_features.set_feature(CPU_AVX512CD);
2999         vm_features.set_feature(CPU_AVX512DQ);
3000         vm_features.set_feature(CPU_AVX512PF);
3001         vm_features.set_feature(CPU_AVX512ER);
3002         vm_features.set_feature(CPU_AVX512BW);
3003         vm_features.set_feature(CPU_AVX512VL);
3004         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3005         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3006         vm_features.set_feature(CPU_AVX512_VAES);
3007         vm_features.set_feature(CPU_AVX512_VNNI);
3008         vm_features.set_feature(CPU_AVX512_BITALG);
3009         vm_features.set_feature(CPU_AVX512_VBMI);
3010         vm_features.set_feature(CPU_AVX512_VBMI2);
3011         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3012           vm_features.set_feature(CPU_AVX10_2);
3013         }
3014       }
3015     }
3016   }
3017 
3018   if (std_cpuid1_ecx.bits.hv != 0)
3019     vm_features.set_feature(CPU_HV);
3020   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3021     vm_features.set_feature(CPU_BMI1);
3022   if (std_cpuid1_edx.bits.tsc != 0)
3023     vm_features.set_feature(CPU_TSC);
3024   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3025     vm_features.set_feature(CPU_TSCINV_BIT);
3026   if (std_cpuid1_ecx.bits.aes != 0)
3027     vm_features.set_feature(CPU_AES);
3028   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3029     vm_features.set_feature(CPU_LZCNT);
3030   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3031     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3032   if (sef_cpuid7_ebx.bits.erms != 0)
3033     vm_features.set_feature(CPU_ERMS);
3034   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3035     vm_features.set_feature(CPU_FSRM);
3036   if (std_cpuid1_ecx.bits.clmul != 0)
3037     vm_features.set_feature(CPU_CLMUL);
3038   if (sef_cpuid7_ebx.bits.rtm != 0)
3039     vm_features.set_feature(CPU_RTM);
3040   if (sef_cpuid7_ebx.bits.adx != 0)
3041      vm_features.set_feature(CPU_ADX);
3042   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3043     vm_features.set_feature(CPU_BMI2);
3044   if (sef_cpuid7_ebx.bits.sha != 0)
3045     vm_features.set_feature(CPU_SHA);
3046   if (std_cpuid1_ecx.bits.fma != 0)
3047     vm_features.set_feature(CPU_FMA);
3048   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3049     vm_features.set_feature(CPU_FLUSHOPT);
3050   if (sef_cpuid7_ebx.bits.clwb != 0)
3051     vm_features.set_feature(CPU_CLWB);
3052   if (ext_cpuid1_edx.bits.rdtscp != 0)
3053     vm_features.set_feature(CPU_RDTSCP);
3054   if (sef_cpuid7_ecx.bits.rdpid != 0)
3055     vm_features.set_feature(CPU_RDPID);
3056 
3057   // AMD|Hygon additional features.
3058   if (is_amd_family()) {
3059     // PREFETCHW was checked above, check TDNOW here.
3060     if ((ext_cpuid1_edx.bits.tdnow != 0))
3061       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3062     if (ext_cpuid1_ecx.bits.sse4a != 0)
3063       vm_features.set_feature(CPU_SSE4A);
3064   }
3065 
3066   // Intel additional features.
3067   if (is_intel()) {
3068     if (sef_cpuid7_edx.bits.serialize != 0)
3069       vm_features.set_feature(CPU_SERIALIZE);
3070     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3071       vm_features.set_feature(CPU_AVX512_FP16);
3072   }
3073 
3074   // ZX additional features.
3075   if (is_zx()) {
3076     // We do not know if these are supported by ZX, so we cannot trust
3077     // common CPUID bit for them.
3078     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3079     vm_features.clear_feature(CPU_CLWB);
3080   }
3081 
3082   // Protection key features.
3083   if (sef_cpuid7_ecx.bits.pku != 0) {
3084     vm_features.set_feature(CPU_PKU);
3085   }
3086   if (sef_cpuid7_ecx.bits.ospke != 0) {
3087     vm_features.set_feature(CPU_OSPKE);
3088   }
3089 
3090   // Control flow enforcement (CET) features.
3091   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3092     vm_features.set_feature(CPU_CET_SS);
3093   }
3094   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3095     vm_features.set_feature(CPU_CET_IBT);
3096   }
3097 
3098   // Composite features.
3099   if (supports_tscinv_bit() &&
3100       ((is_amd_family() && !is_amd_Barcelona()) ||
3101        is_intel_tsc_synched_at_init())) {
3102     vm_features.set_feature(CPU_TSCINV);
3103   }
3104   return vm_features;
3105 }
3106 
3107 bool VM_Version::os_supports_avx_vectors() {
3108   bool retVal = false;
3109   int nreg = 4;
3110   if (supports_evex()) {
3111     // Verify that OS save/restore all bits of EVEX registers
3112     // during signal processing.
3113     retVal = true;
3114     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3115       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3116         retVal = false;
3117         break;
3118       }
3119     }
3120   } else if (supports_avx()) {
3121     // Verify that OS save/restore all bits of AVX registers
3122     // during signal processing.
3123     retVal = true;
3124     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3125       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3126         retVal = false;
3127         break;
3128       }
3129     }
3130     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3131     if (retVal == false) {
3132       // Verify that OS save/restore all bits of EVEX registers
3133       // during signal processing.
3134       retVal = true;
3135       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3136         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3137           retVal = false;
3138           break;
3139         }
3140       }
3141     }
3142   }
3143   return retVal;
3144 }
3145 
3146 bool VM_Version::os_supports_apx_egprs() {
3147   if (!supports_apx_f()) {
3148     return false;
3149   }
3150   // Enable APX support for product builds after
3151   // completion of planned features listed in JDK-8329030.
3152 #if !defined(PRODUCT)
3153   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3154       _cpuid_info.apx_save[1] != egpr_test_value()) {
3155     return false;
3156   }
3157   return true;
3158 #else
3159   return false;
3160 #endif
3161 }
3162 
3163 uint VM_Version::cores_per_cpu() {
3164   uint result = 1;
3165   if (is_intel()) {
3166     bool supports_topology = supports_processor_topology();
3167     if (supports_topology) {
3168       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3169                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3170     }
3171     if (!supports_topology || result == 0) {
3172       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3173     }
3174   } else if (is_amd_family()) {
3175     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3176   } else if (is_zx()) {
3177     bool supports_topology = supports_processor_topology();
3178     if (supports_topology) {
3179       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3180                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3181     }
3182     if (!supports_topology || result == 0) {
3183       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3184     }
3185   }
3186   return result;
3187 }
3188 
3189 uint VM_Version::threads_per_core() {
3190   uint result = 1;
3191   if (is_intel() && supports_processor_topology()) {
3192     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3193   } else if (is_zx() && supports_processor_topology()) {
3194     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3195   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3196     if (cpu_family() >= 0x17) {
3197       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3198     } else {
3199       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3200                  cores_per_cpu();
3201     }
3202   }
3203   return (result == 0 ? 1 : result);
3204 }
3205 
3206 uint VM_Version::L1_line_size() {
3207   uint result = 0;
3208   if (is_intel()) {
3209     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3210   } else if (is_amd_family()) {
3211     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3212   } else if (is_zx()) {
3213     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3214   }
3215   if (result < 32) // not defined ?
3216     result = 32;   // 32 bytes by default on x86 and other x64
3217   return result;
3218 }
3219 
3220 bool VM_Version::is_intel_tsc_synched_at_init() {
3221   if (is_intel_family_core()) {
3222     uint32_t ext_model = extended_cpu_model();
3223     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3224         ext_model == CPU_MODEL_WESTMERE_EP    ||
3225         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3226         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3227       // <= 2-socket invariant tsc support. EX versions are usually used
3228       // in > 2-socket systems and likely don't synchronize tscs at
3229       // initialization.
3230       // Code that uses tsc values must be prepared for them to arbitrarily
3231       // jump forward or backward.
3232       return true;
3233     }
3234   }
3235   return false;
3236 }
3237 
3238 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3239   // Hardware prefetching (distance/size in bytes):
3240   // Pentium 3 -  64 /  32
3241   // Pentium 4 - 256 / 128
3242   // Athlon    -  64 /  32 ????
3243   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3244   // Core      - 128 /  64
3245   //
3246   // Software prefetching (distance in bytes / instruction with best score):
3247   // Pentium 3 - 128 / prefetchnta
3248   // Pentium 4 - 512 / prefetchnta
3249   // Athlon    - 128 / prefetchnta
3250   // Opteron   - 256 / prefetchnta
3251   // Core      - 256 / prefetchnta
3252   // It will be used only when AllocatePrefetchStyle > 0
3253 
3254   if (is_amd_family()) { // AMD | Hygon
3255     if (supports_sse2()) {
3256       return 256; // Opteron
3257     } else {
3258       return 128; // Athlon
3259     }
3260   } else { // Intel
3261     if (supports_sse3() && cpu_family() == 6) {
3262       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3263         return 192;
3264       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3265         return 384;
3266       }
3267     }
3268     if (supports_sse2()) {
3269       if (cpu_family() == 6) {
3270         return 256; // Pentium M, Core, Core2
3271       } else {
3272         return 512; // Pentium 4
3273       }
3274     } else {
3275       return 128; // Pentium 3 (and all other old CPUs)
3276     }
3277   }
3278 }
3279 
3280 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3281   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3282   switch (id) {
3283   case vmIntrinsics::_floatToFloat16:
3284   case vmIntrinsics::_float16ToFloat:
3285     if (!supports_float16()) {
3286       return false;
3287     }
3288     break;
3289   default:
3290     break;
3291   }
3292   return true;
3293 }
3294 
3295 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3296   int i = 0;
3297   ss.join([&]() {
3298     const char* str = nullptr;
3299     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3300       if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3301         str = _features_names[i];
3302       }
3303       i += 1;
3304     }
3305     return str;
3306   }, ", ");
3307 }
3308 
3309 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3310   VM_Features* features = (VM_Features*)features_buffer;
3311   insert_features_names(*features, ss);
3312 }
3313 
3314 void VM_Version::get_missing_features_name(void* features_buffer, stringStream& ss) {
3315   VM_Features* features_to_test = (VM_Features*)features_buffer;
3316   int i = 0;
3317   ss.join([&]() {
3318     const char* str = nullptr;
3319     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3320       Feature_Flag flag = (Feature_Flag)i;
3321       if (features_to_test->supports_feature(flag) && !_features.supports_feature(flag)) {
3322         str = _features_names[i];
3323       }
3324       i += 1;
3325     }
3326     return str;
3327   }, ", ");
3328 }
3329 
3330 int VM_Version::cpu_features_size() {
3331   return sizeof(VM_Features);
3332 }
3333 
3334 void VM_Version::store_cpu_features(void* buf) {
3335   VM_Features copy = _features;
3336   copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
3337   memcpy(buf, &copy, sizeof(VM_Features));
3338 }
3339 
3340 bool VM_Version::supports_features(void* features_buffer) {
3341   VM_Features* features_to_test = (VM_Features*)features_buffer;
3342   return _features.supports_features(features_to_test);
3343 }