1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/powerOfTwo.hpp"
  42 #include "utilities/virtualizationSupport.hpp"
  43 
  44 int VM_Version::_cpu;
  45 int VM_Version::_model;
  46 int VM_Version::_stepping;
  47 bool VM_Version::_has_intel_jcc_erratum;
  48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  49 
  50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  52 #undef DECLARE_CPU_FEATURE_FLAG
  53 
  54 // Address of instruction which causes SEGV
  55 address VM_Version::_cpuinfo_segv_addr = nullptr;
  56 // Address of instruction after the one which causes SEGV
  57 address VM_Version::_cpuinfo_cont_addr = nullptr;
  58 // Address of instruction which causes APX specific SEGV
  59 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  60 // Address of instruction after the one which causes APX specific SEGV
  61 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  62 
  63 static BufferBlob* stub_blob;
  64 static const int stub_size = 2000;
  65 
  66 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  67 
  68 VM_Version::VM_Features VM_Version::_features;
  69 VM_Version::VM_Features VM_Version::_cpu_features;
  70 
  71 extern "C" {
  72   typedef void (*get_cpu_info_stub_t)(void*);
  73   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  74   typedef void (*clear_apx_test_state_t)(void);
  75 }
  76 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  77 static detect_virt_stub_t detect_virt_stub = nullptr;
  78 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  79 
  80 bool VM_Version::supports_clflush() {
  81   // clflush should always be available on x86_64
  82   // if not we are in real trouble because we rely on it
  83   // to flush the code cache.
  84   // Unfortunately, Assembler::clflush is currently called as part
  85   // of generation of the code cache flush routine. This happens
  86   // under Universe::init before the processor features are set
  87   // up. Assembler::flush calls this routine to check that clflush
  88   // is allowed. So, we give the caller a free pass if Universe init
  89   // is still in progress.
  90   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  91   return true;
  92 }
  93 
  94 #define CPUID_STANDARD_FN   0x0
  95 #define CPUID_STANDARD_FN_1 0x1
  96 #define CPUID_STANDARD_FN_4 0x4
  97 #define CPUID_STANDARD_FN_B 0xb
  98 
  99 #define CPUID_EXTENDED_FN   0x80000000
 100 #define CPUID_EXTENDED_FN_1 0x80000001
 101 #define CPUID_EXTENDED_FN_2 0x80000002
 102 #define CPUID_EXTENDED_FN_3 0x80000003
 103 #define CPUID_EXTENDED_FN_4 0x80000004
 104 #define CPUID_EXTENDED_FN_7 0x80000007
 105 #define CPUID_EXTENDED_FN_8 0x80000008
 106 
 107 class VM_Version_StubGenerator: public StubCodeGenerator {
 108  public:
 109 
 110   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 111 
 112   address clear_apx_test_state() {
 113 #   define __ _masm->
 114     address start = __ pc();
 115     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 116     // handling guarantees that preserved register values post signal handling were
 117     // re-instantiated by operating system and not because they were not modified externally.
 118 
 119     bool save_apx = UseAPX;
 120     VM_Version::set_apx_cpuFeatures();
 121     UseAPX = true;
 122     // EGPR state save/restoration.
 123     __ mov64(r16, 0L);
 124     __ mov64(r31, 0L);
 125     UseAPX = save_apx;
 126     VM_Version::clean_cpuFeatures();
 127     __ ret(0);
 128     return start;
 129   }
 130 
 131   address generate_get_cpu_info() {
 132     // Flags to test CPU type.
 133     const uint32_t HS_EFL_AC = 0x40000;
 134     const uint32_t HS_EFL_ID = 0x200000;
 135     // Values for when we don't have a CPUID instruction.
 136     const int      CPU_FAMILY_SHIFT = 8;
 137     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 138     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 139     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 140 
 141     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24;
 142     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 143     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 144     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 145 
 146     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 147 #   define __ _masm->
 148 
 149     address start = __ pc();
 150 
 151     //
 152     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 153     //
 154     // rcx and rdx are first and second argument registers on windows
 155 
 156     __ push(rbp);
 157     __ mov(rbp, c_rarg0); // cpuid_info address
 158     __ push(rbx);
 159     __ push(rsi);
 160     __ pushf();          // preserve rbx, and flags
 161     __ pop(rax);
 162     __ push(rax);
 163     __ mov(rcx, rax);
 164     //
 165     // if we are unable to change the AC flag, we have a 386
 166     //
 167     __ xorl(rax, HS_EFL_AC);
 168     __ push(rax);
 169     __ popf();
 170     __ pushf();
 171     __ pop(rax);
 172     __ cmpptr(rax, rcx);
 173     __ jccb(Assembler::notEqual, detect_486);
 174 
 175     __ movl(rax, CPU_FAMILY_386);
 176     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 177     __ jmp(done);
 178 
 179     //
 180     // If we are unable to change the ID flag, we have a 486 which does
 181     // not support the "cpuid" instruction.
 182     //
 183     __ bind(detect_486);
 184     __ mov(rax, rcx);
 185     __ xorl(rax, HS_EFL_ID);
 186     __ push(rax);
 187     __ popf();
 188     __ pushf();
 189     __ pop(rax);
 190     __ cmpptr(rcx, rax);
 191     __ jccb(Assembler::notEqual, detect_586);
 192 
 193     __ bind(cpu486);
 194     __ movl(rax, CPU_FAMILY_486);
 195     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 196     __ jmp(done);
 197 
 198     //
 199     // At this point, we have a chip which supports the "cpuid" instruction
 200     //
 201     __ bind(detect_586);
 202     __ xorl(rax, rax);
 203     __ cpuid();
 204     __ orl(rax, rax);
 205     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 206                                         // value of at least 1, we give up and
 207                                         // assume a 486
 208     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 209     __ movl(Address(rsi, 0), rax);
 210     __ movl(Address(rsi, 4), rbx);
 211     __ movl(Address(rsi, 8), rcx);
 212     __ movl(Address(rsi,12), rdx);
 213 
 214     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 215     __ jccb(Assembler::belowEqual, std_cpuid4);
 216 
 217     //
 218     // cpuid(0xB) Processor Topology
 219     //
 220     __ movl(rax, 0xb);
 221     __ xorl(rcx, rcx);   // Threads level
 222     __ cpuid();
 223 
 224     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 225     __ movl(Address(rsi, 0), rax);
 226     __ movl(Address(rsi, 4), rbx);
 227     __ movl(Address(rsi, 8), rcx);
 228     __ movl(Address(rsi,12), rdx);
 229 
 230     __ movl(rax, 0xb);
 231     __ movl(rcx, 1);     // Cores level
 232     __ cpuid();
 233     __ push(rax);
 234     __ andl(rax, 0x1f);  // Determine if valid topology level
 235     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 236     __ andl(rax, 0xffff);
 237     __ pop(rax);
 238     __ jccb(Assembler::equal, std_cpuid4);
 239 
 240     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 241     __ movl(Address(rsi, 0), rax);
 242     __ movl(Address(rsi, 4), rbx);
 243     __ movl(Address(rsi, 8), rcx);
 244     __ movl(Address(rsi,12), rdx);
 245 
 246     __ movl(rax, 0xb);
 247     __ movl(rcx, 2);     // Packages level
 248     __ cpuid();
 249     __ push(rax);
 250     __ andl(rax, 0x1f);  // Determine if valid topology level
 251     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 252     __ andl(rax, 0xffff);
 253     __ pop(rax);
 254     __ jccb(Assembler::equal, std_cpuid4);
 255 
 256     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 257     __ movl(Address(rsi, 0), rax);
 258     __ movl(Address(rsi, 4), rbx);
 259     __ movl(Address(rsi, 8), rcx);
 260     __ movl(Address(rsi,12), rdx);
 261 
 262     //
 263     // cpuid(0x4) Deterministic cache params
 264     //
 265     __ bind(std_cpuid4);
 266     __ movl(rax, 4);
 267     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 268     __ jccb(Assembler::greater, std_cpuid1);
 269 
 270     __ xorl(rcx, rcx);   // L1 cache
 271     __ cpuid();
 272     __ push(rax);
 273     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 274     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 275     __ pop(rax);
 276     __ jccb(Assembler::equal, std_cpuid1);
 277 
 278     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 279     __ movl(Address(rsi, 0), rax);
 280     __ movl(Address(rsi, 4), rbx);
 281     __ movl(Address(rsi, 8), rcx);
 282     __ movl(Address(rsi,12), rdx);
 283 
 284     //
 285     // Standard cpuid(0x1)
 286     //
 287     __ bind(std_cpuid1);
 288     __ movl(rax, 1);
 289     __ cpuid();
 290     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 291     __ movl(Address(rsi, 0), rax);
 292     __ movl(Address(rsi, 4), rbx);
 293     __ movl(Address(rsi, 8), rcx);
 294     __ movl(Address(rsi,12), rdx);
 295 
 296     //
 297     // Check if OS has enabled XGETBV instruction to access XCR0
 298     // (OSXSAVE feature flag) and CPU supports AVX
 299     //
 300     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 301     __ cmpl(rcx, 0x18000000);
 302     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 303 
 304     //
 305     // XCR0, XFEATURE_ENABLED_MASK register
 306     //
 307     __ xorl(rcx, rcx);   // zero for XCR0 register
 308     __ xgetbv();
 309     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 310     __ movl(Address(rsi, 0), rax);
 311     __ movl(Address(rsi, 4), rdx);
 312 
 313     //
 314     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 315     //
 316     __ bind(sef_cpuid);
 317     __ movl(rax, 7);
 318     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 319     __ jccb(Assembler::greater, ext_cpuid);
 320     // ECX = 0
 321     __ xorl(rcx, rcx);
 322     __ cpuid();
 323     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 324     __ movl(Address(rsi, 0), rax);
 325     __ movl(Address(rsi, 4), rbx);
 326     __ movl(Address(rsi, 8), rcx);
 327     __ movl(Address(rsi, 12), rdx);
 328 
 329     //
 330     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 331     //
 332     __ bind(sefsl1_cpuid);
 333     __ movl(rax, 7);
 334     __ movl(rcx, 1);
 335     __ cpuid();
 336     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 337     __ movl(Address(rsi, 0), rax);
 338     __ movl(Address(rsi, 4), rdx);
 339 
 340     //
 341     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 342     //
 343     __ bind(std_cpuid24);
 344     __ movl(rax, 0x24);
 345     __ movl(rcx, 0);
 346     __ cpuid();
 347     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 348     __ movl(Address(rsi, 0), rax);
 349     __ movl(Address(rsi, 4), rbx);
 350 
 351     //
 352     // Extended cpuid(0x80000000)
 353     //
 354     __ bind(ext_cpuid);
 355     __ movl(rax, 0x80000000);
 356     __ cpuid();
 357     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 358     __ jcc(Assembler::belowEqual, done);
 359     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 360     __ jcc(Assembler::belowEqual, ext_cpuid1);
 361     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 362     __ jccb(Assembler::belowEqual, ext_cpuid5);
 363     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 364     __ jccb(Assembler::belowEqual, ext_cpuid7);
 365     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 366     __ jccb(Assembler::belowEqual, ext_cpuid8);
 367     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 368     __ jccb(Assembler::below, ext_cpuid8);
 369     //
 370     // Extended cpuid(0x8000001E)
 371     //
 372     __ movl(rax, 0x8000001E);
 373     __ cpuid();
 374     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 375     __ movl(Address(rsi, 0), rax);
 376     __ movl(Address(rsi, 4), rbx);
 377     __ movl(Address(rsi, 8), rcx);
 378     __ movl(Address(rsi,12), rdx);
 379 
 380     //
 381     // Extended cpuid(0x80000008)
 382     //
 383     __ bind(ext_cpuid8);
 384     __ movl(rax, 0x80000008);
 385     __ cpuid();
 386     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 387     __ movl(Address(rsi, 0), rax);
 388     __ movl(Address(rsi, 4), rbx);
 389     __ movl(Address(rsi, 8), rcx);
 390     __ movl(Address(rsi,12), rdx);
 391 
 392     //
 393     // Extended cpuid(0x80000007)
 394     //
 395     __ bind(ext_cpuid7);
 396     __ movl(rax, 0x80000007);
 397     __ cpuid();
 398     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 399     __ movl(Address(rsi, 0), rax);
 400     __ movl(Address(rsi, 4), rbx);
 401     __ movl(Address(rsi, 8), rcx);
 402     __ movl(Address(rsi,12), rdx);
 403 
 404     //
 405     // Extended cpuid(0x80000005)
 406     //
 407     __ bind(ext_cpuid5);
 408     __ movl(rax, 0x80000005);
 409     __ cpuid();
 410     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 411     __ movl(Address(rsi, 0), rax);
 412     __ movl(Address(rsi, 4), rbx);
 413     __ movl(Address(rsi, 8), rcx);
 414     __ movl(Address(rsi,12), rdx);
 415 
 416     //
 417     // Extended cpuid(0x80000001)
 418     //
 419     __ bind(ext_cpuid1);
 420     __ movl(rax, 0x80000001);
 421     __ cpuid();
 422     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 423     __ movl(Address(rsi, 0), rax);
 424     __ movl(Address(rsi, 4), rbx);
 425     __ movl(Address(rsi, 8), rcx);
 426     __ movl(Address(rsi,12), rdx);
 427 
 428     //
 429     // Check if OS has enabled XGETBV instruction to access XCR0
 430     // (OSXSAVE feature flag) and CPU supports APX
 431     //
 432     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 433     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 434     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 435     __ movl(rax, 0x200000);
 436     __ andl(rax, Address(rsi, 4));
 437     __ jcc(Assembler::equal, vector_save_restore);
 438     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 439     __ movl(rax, 0x80000);
 440     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 441     __ jcc(Assembler::equal, vector_save_restore);
 442 
 443 #ifndef PRODUCT
 444     bool save_apx = UseAPX;
 445     VM_Version::set_apx_cpuFeatures();
 446     UseAPX = true;
 447     __ mov64(r16, VM_Version::egpr_test_value());
 448     __ mov64(r31, VM_Version::egpr_test_value());
 449     __ xorl(rsi, rsi);
 450     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 451     // Generate SEGV
 452     __ movl(rax, Address(rsi, 0));
 453 
 454     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 455     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 456     __ movq(Address(rsi, 0), r16);
 457     __ movq(Address(rsi, 8), r31);
 458 
 459     UseAPX = save_apx;
 460 #endif
 461     __ bind(vector_save_restore);
 462     //
 463     // Check if OS has enabled XGETBV instruction to access XCR0
 464     // (OSXSAVE feature flag) and CPU supports AVX
 465     //
 466     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 467     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 468     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 469     __ cmpl(rcx, 0x18000000);
 470     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 471 
 472     __ movl(rax, 0x6);
 473     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 474     __ cmpl(rax, 0x6);
 475     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 476 
 477     // we need to bridge farther than imm8, so we use this island as a thunk
 478     __ bind(done);
 479     __ jmp(wrapup);
 480 
 481     __ bind(start_simd_check);
 482     //
 483     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 484     // registers are not restored after a signal processing.
 485     // Generate SEGV here (reference through null)
 486     // and check upper YMM/ZMM bits after it.
 487     //
 488     int saved_useavx = UseAVX;
 489     int saved_usesse = UseSSE;
 490 
 491     // If UseAVX is uninitialized or is set by the user to include EVEX
 492     if (use_evex) {
 493       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 494       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 495       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 496       __ movl(rax, 0x10000);
 497       __ andl(rax, Address(rsi, 4));
 498       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 499       __ movl(rbx, 0x80000);
 500       __ andl(rbx, Address(rsi, 4));
 501       __ orl(rax, rbx);
 502       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 503       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 504       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 505       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 506       __ movl(rax, 0xE0);
 507       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 508       __ cmpl(rax, 0xE0);
 509       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 510 
 511       if (FLAG_IS_DEFAULT(UseAVX)) {
 512         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 513         __ movl(rax, Address(rsi, 0));
 514         __ cmpl(rax, 0x50654);              // If it is Skylake
 515         __ jcc(Assembler::equal, legacy_setup);
 516       }
 517       // EVEX setup: run in lowest evex mode
 518       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 519       UseAVX = 3;
 520       UseSSE = 2;
 521 #ifdef _WINDOWS
 522       // xmm5-xmm15 are not preserved by caller on windows
 523       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 524       __ subptr(rsp, 64);
 525       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 526       __ subptr(rsp, 64);
 527       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 528       __ subptr(rsp, 64);
 529       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 530 #endif // _WINDOWS
 531 
 532       // load value into all 64 bytes of zmm7 register
 533       __ movl(rcx, VM_Version::ymm_test_value());
 534       __ movdl(xmm0, rcx);
 535       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 536       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 537       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 538       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 539       VM_Version::clean_cpuFeatures();
 540       __ jmp(save_restore_except);
 541     }
 542 
 543     __ bind(legacy_setup);
 544     // AVX setup
 545     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 546     UseAVX = 1;
 547     UseSSE = 2;
 548 #ifdef _WINDOWS
 549     __ subptr(rsp, 32);
 550     __ vmovdqu(Address(rsp, 0), xmm7);
 551     __ subptr(rsp, 32);
 552     __ vmovdqu(Address(rsp, 0), xmm8);
 553     __ subptr(rsp, 32);
 554     __ vmovdqu(Address(rsp, 0), xmm15);
 555 #endif // _WINDOWS
 556 
 557     // load value into all 32 bytes of ymm7 register
 558     __ movl(rcx, VM_Version::ymm_test_value());
 559 
 560     __ movdl(xmm0, rcx);
 561     __ pshufd(xmm0, xmm0, 0x00);
 562     __ vinsertf128_high(xmm0, xmm0);
 563     __ vmovdqu(xmm7, xmm0);
 564     __ vmovdqu(xmm8, xmm0);
 565     __ vmovdqu(xmm15, xmm0);
 566     VM_Version::clean_cpuFeatures();
 567 
 568     __ bind(save_restore_except);
 569     __ xorl(rsi, rsi);
 570     VM_Version::set_cpuinfo_segv_addr(__ pc());
 571     // Generate SEGV
 572     __ movl(rax, Address(rsi, 0));
 573 
 574     VM_Version::set_cpuinfo_cont_addr(__ pc());
 575     // Returns here after signal. Save xmm0 to check it later.
 576 
 577     // If UseAVX is uninitialized or is set by the user to include EVEX
 578     if (use_evex) {
 579       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 580       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 581       __ movl(rax, 0x10000);
 582       __ andl(rax, Address(rsi, 4));
 583       __ jcc(Assembler::equal, legacy_save_restore);
 584       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 585       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 586       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 587       __ movl(rax, 0xE0);
 588       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 589       __ cmpl(rax, 0xE0);
 590       __ jcc(Assembler::notEqual, legacy_save_restore);
 591 
 592       if (FLAG_IS_DEFAULT(UseAVX)) {
 593         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 594         __ movl(rax, Address(rsi, 0));
 595         __ cmpl(rax, 0x50654);              // If it is Skylake
 596         __ jcc(Assembler::equal, legacy_save_restore);
 597       }
 598       // EVEX check: run in lowest evex mode
 599       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 600       UseAVX = 3;
 601       UseSSE = 2;
 602       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 603       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 604       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 605       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 606       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 607 
 608 #ifdef _WINDOWS
 609       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 610       __ addptr(rsp, 64);
 611       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 612       __ addptr(rsp, 64);
 613       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 614       __ addptr(rsp, 64);
 615 #endif // _WINDOWS
 616       generate_vzeroupper(wrapup);
 617       VM_Version::clean_cpuFeatures();
 618       UseAVX = saved_useavx;
 619       UseSSE = saved_usesse;
 620       __ jmp(wrapup);
 621    }
 622 
 623     __ bind(legacy_save_restore);
 624     // AVX check
 625     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 626     UseAVX = 1;
 627     UseSSE = 2;
 628     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 629     __ vmovdqu(Address(rsi, 0), xmm0);
 630     __ vmovdqu(Address(rsi, 32), xmm7);
 631     __ vmovdqu(Address(rsi, 64), xmm8);
 632     __ vmovdqu(Address(rsi, 96), xmm15);
 633 
 634 #ifdef _WINDOWS
 635     __ vmovdqu(xmm15, Address(rsp, 0));
 636     __ addptr(rsp, 32);
 637     __ vmovdqu(xmm8, Address(rsp, 0));
 638     __ addptr(rsp, 32);
 639     __ vmovdqu(xmm7, Address(rsp, 0));
 640     __ addptr(rsp, 32);
 641 #endif // _WINDOWS
 642 
 643     generate_vzeroupper(wrapup);
 644     VM_Version::clean_cpuFeatures();
 645     UseAVX = saved_useavx;
 646     UseSSE = saved_usesse;
 647 
 648     __ bind(wrapup);
 649     __ popf();
 650     __ pop(rsi);
 651     __ pop(rbx);
 652     __ pop(rbp);
 653     __ ret(0);
 654 
 655 #   undef __
 656 
 657     return start;
 658   };
 659   void generate_vzeroupper(Label& L_wrapup) {
 660 #   define __ _masm->
 661     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 662     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 663     __ jcc(Assembler::notEqual, L_wrapup);
 664     __ movl(rcx, 0x0FFF0FF0);
 665     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 666     __ andl(rcx, Address(rsi, 0));
 667     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 668     __ jcc(Assembler::equal, L_wrapup);
 669     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 670     __ jcc(Assembler::equal, L_wrapup);
 671     // vzeroupper() will use a pre-computed instruction sequence that we
 672     // can't compute until after we've determined CPU capabilities. Use
 673     // uncached variant here directly to be able to bootstrap correctly
 674     __ vzeroupper_uncached();
 675 #   undef __
 676   }
 677   address generate_detect_virt() {
 678     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 679 #   define __ _masm->
 680 
 681     address start = __ pc();
 682 
 683     // Evacuate callee-saved registers
 684     __ push(rbp);
 685     __ push(rbx);
 686     __ push(rsi); // for Windows
 687 
 688     __ mov(rax, c_rarg0); // CPUID leaf
 689     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 690 
 691     __ cpuid();
 692 
 693     // Store result to register array
 694     __ movl(Address(rsi,  0), rax);
 695     __ movl(Address(rsi,  4), rbx);
 696     __ movl(Address(rsi,  8), rcx);
 697     __ movl(Address(rsi, 12), rdx);
 698 
 699     // Epilogue
 700     __ pop(rsi);
 701     __ pop(rbx);
 702     __ pop(rbp);
 703     __ ret(0);
 704 
 705 #   undef __
 706 
 707     return start;
 708   };
 709 
 710 
 711   address generate_getCPUIDBrandString(void) {
 712     // Flags to test CPU type.
 713     const uint32_t HS_EFL_AC           = 0x40000;
 714     const uint32_t HS_EFL_ID           = 0x200000;
 715     // Values for when we don't have a CPUID instruction.
 716     const int      CPU_FAMILY_SHIFT = 8;
 717     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 718     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 719 
 720     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 721 
 722     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 723 #   define __ _masm->
 724 
 725     address start = __ pc();
 726 
 727     //
 728     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 729     //
 730     // rcx and rdx are first and second argument registers on windows
 731 
 732     __ push(rbp);
 733     __ mov(rbp, c_rarg0); // cpuid_info address
 734     __ push(rbx);
 735     __ push(rsi);
 736     __ pushf();          // preserve rbx, and flags
 737     __ pop(rax);
 738     __ push(rax);
 739     __ mov(rcx, rax);
 740     //
 741     // if we are unable to change the AC flag, we have a 386
 742     //
 743     __ xorl(rax, HS_EFL_AC);
 744     __ push(rax);
 745     __ popf();
 746     __ pushf();
 747     __ pop(rax);
 748     __ cmpptr(rax, rcx);
 749     __ jccb(Assembler::notEqual, detect_486);
 750 
 751     __ movl(rax, CPU_FAMILY_386);
 752     __ jmp(done);
 753 
 754     //
 755     // If we are unable to change the ID flag, we have a 486 which does
 756     // not support the "cpuid" instruction.
 757     //
 758     __ bind(detect_486);
 759     __ mov(rax, rcx);
 760     __ xorl(rax, HS_EFL_ID);
 761     __ push(rax);
 762     __ popf();
 763     __ pushf();
 764     __ pop(rax);
 765     __ cmpptr(rcx, rax);
 766     __ jccb(Assembler::notEqual, detect_586);
 767 
 768     __ bind(cpu486);
 769     __ movl(rax, CPU_FAMILY_486);
 770     __ jmp(done);
 771 
 772     //
 773     // At this point, we have a chip which supports the "cpuid" instruction
 774     //
 775     __ bind(detect_586);
 776     __ xorl(rax, rax);
 777     __ cpuid();
 778     __ orl(rax, rax);
 779     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 780                                         // value of at least 1, we give up and
 781                                         // assume a 486
 782 
 783     //
 784     // Extended cpuid(0x80000000) for processor brand string detection
 785     //
 786     __ bind(ext_cpuid);
 787     __ movl(rax, CPUID_EXTENDED_FN);
 788     __ cpuid();
 789     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 790     __ jcc(Assembler::below, done);
 791 
 792     //
 793     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 794     //
 795     __ movl(rax, CPUID_EXTENDED_FN_2);
 796     __ cpuid();
 797     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 798     __ movl(Address(rsi, 0), rax);
 799     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 800     __ movl(Address(rsi, 0), rbx);
 801     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 802     __ movl(Address(rsi, 0), rcx);
 803     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 804     __ movl(Address(rsi,0), rdx);
 805 
 806     //
 807     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 808     //
 809     __ movl(rax, CPUID_EXTENDED_FN_3);
 810     __ cpuid();
 811     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 812     __ movl(Address(rsi, 0), rax);
 813     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 814     __ movl(Address(rsi, 0), rbx);
 815     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 816     __ movl(Address(rsi, 0), rcx);
 817     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 818     __ movl(Address(rsi,0), rdx);
 819 
 820     //
 821     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 822     //
 823     __ movl(rax, CPUID_EXTENDED_FN_4);
 824     __ cpuid();
 825     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 826     __ movl(Address(rsi, 0), rax);
 827     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 828     __ movl(Address(rsi, 0), rbx);
 829     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 830     __ movl(Address(rsi, 0), rcx);
 831     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 832     __ movl(Address(rsi,0), rdx);
 833 
 834     //
 835     // return
 836     //
 837     __ bind(done);
 838     __ popf();
 839     __ pop(rsi);
 840     __ pop(rbx);
 841     __ pop(rbp);
 842     __ ret(0);
 843 
 844 #   undef __
 845 
 846     return start;
 847   };
 848 };
 849 
 850 void VM_Version::get_processor_features() {
 851 
 852   _cpu = 4; // 486 by default
 853   _model = 0;
 854   _stepping = 0;
 855   _logical_processors_per_package = 1;
 856   // i486 internal cache is both I&D and has a 16-byte line size
 857   _L1_data_cache_line_size = 16;
 858 
 859   // Get raw processor info
 860 
 861   get_cpu_info_stub(&_cpuid_info);
 862 
 863   assert_is_initialized();
 864   _cpu = extended_cpu_family();
 865   _model = extended_cpu_model();
 866   _stepping = cpu_stepping();
 867 
 868   if (cpu_family() > 4) { // it supports CPUID
 869     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 870     _cpu_features = _features; // Preserve features
 871     // Logical processors are only available on P4s and above,
 872     // and only if hyperthreading is available.
 873     _logical_processors_per_package = logical_processor_count();
 874     _L1_data_cache_line_size = L1_line_size();
 875   }
 876 
 877   // xchg and xadd instructions
 878   _supports_atomic_getset4 = true;
 879   _supports_atomic_getadd4 = true;
 880   _supports_atomic_getset8 = true;
 881   _supports_atomic_getadd8 = true;
 882 
 883   // OS should support SSE for x64 and hardware should support at least SSE2.
 884   if (!VM_Version::supports_sse2()) {
 885     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 886   }
 887   // in 64 bit the use of SSE2 is the minimum
 888   if (UseSSE < 2) UseSSE = 2;
 889 
 890   // flush_icache_stub have to be generated first.
 891   // That is why Icache line size is hard coded in ICache class,
 892   // see icache_x86.hpp. It is also the reason why we can't use
 893   // clflush instruction in 32-bit VM since it could be running
 894   // on CPU which does not support it.
 895   //
 896   // The only thing we can do is to verify that flushed
 897   // ICache::line_size has correct value.
 898   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 899   // clflush_size is size in quadwords (8 bytes).
 900   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 901 
 902   // assigning this field effectively enables Unsafe.writebackMemory()
 903   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 904   // that is only implemented on x86_64 and only if the OS plays ball
 905   if (os::supports_map_sync()) {
 906     // publish data cache line flush size to generic field, otherwise
 907     // let if default to zero thereby disabling writeback
 908     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 909   }
 910 
 911   // Check if processor has Intel Ecore
 912   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 913     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 914       _model == 0xCC || _model == 0xDD)) {
 915     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 916   }
 917 
 918   if (UseSSE < 4) {
 919     _features.clear_feature(CPU_SSE4_1);
 920     _features.clear_feature(CPU_SSE4_2);
 921   }
 922 
 923   if (UseSSE < 3) {
 924     _features.clear_feature(CPU_SSE3);
 925     _features.clear_feature(CPU_SSSE3);
 926     _features.clear_feature(CPU_SSE4A);
 927   }
 928 
 929   if (UseSSE < 2)
 930     _features.clear_feature(CPU_SSE2);
 931 
 932   if (UseSSE < 1)
 933     _features.clear_feature(CPU_SSE);
 934 
 935   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 936   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 937     UseAVX = 0;
 938   }
 939 
 940   // UseSSE is set to the smaller of what hardware supports and what
 941   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 942   // older Pentiums which do not support it.
 943   int use_sse_limit = 0;
 944   if (UseSSE > 0) {
 945     if (UseSSE > 3 && supports_sse4_1()) {
 946       use_sse_limit = 4;
 947     } else if (UseSSE > 2 && supports_sse3()) {
 948       use_sse_limit = 3;
 949     } else if (UseSSE > 1 && supports_sse2()) {
 950       use_sse_limit = 2;
 951     } else if (UseSSE > 0 && supports_sse()) {
 952       use_sse_limit = 1;
 953     } else {
 954       use_sse_limit = 0;
 955     }
 956   }
 957   if (FLAG_IS_DEFAULT(UseSSE)) {
 958     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 959   } else if (UseSSE > use_sse_limit) {
 960     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 961     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 962   }
 963 
 964   // first try initial setting and detect what we can support
 965   int use_avx_limit = 0;
 966   if (UseAVX > 0) {
 967     if (UseSSE < 4) {
 968       // Don't use AVX if SSE is unavailable or has been disabled.
 969       use_avx_limit = 0;
 970     } else if (UseAVX > 2 && supports_evex()) {
 971       use_avx_limit = 3;
 972     } else if (UseAVX > 1 && supports_avx2()) {
 973       use_avx_limit = 2;
 974     } else if (UseAVX > 0 && supports_avx()) {
 975       use_avx_limit = 1;
 976     } else {
 977       use_avx_limit = 0;
 978     }
 979   }
 980   if (FLAG_IS_DEFAULT(UseAVX)) {
 981     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 982     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 983       FLAG_SET_DEFAULT(UseAVX, 2);
 984     } else {
 985       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 986     }
 987   }
 988 
 989   if (UseAVX > use_avx_limit) {
 990     if (UseSSE < 4) {
 991       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 992     } else {
 993       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 994     }
 995     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 996   }
 997 
 998   if (UseAVX < 3) {
 999     _features.clear_feature(CPU_AVX512F);
1000     _features.clear_feature(CPU_AVX512DQ);
1001     _features.clear_feature(CPU_AVX512CD);
1002     _features.clear_feature(CPU_AVX512BW);
1003     _features.clear_feature(CPU_AVX512ER);
1004     _features.clear_feature(CPU_AVX512PF);
1005     _features.clear_feature(CPU_AVX512VL);
1006     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1007     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1008     _features.clear_feature(CPU_AVX512_VAES);
1009     _features.clear_feature(CPU_AVX512_VNNI);
1010     _features.clear_feature(CPU_AVX512_VBMI);
1011     _features.clear_feature(CPU_AVX512_VBMI2);
1012     _features.clear_feature(CPU_AVX512_BITALG);
1013     _features.clear_feature(CPU_AVX512_IFMA);
1014     _features.clear_feature(CPU_APX_F);
1015     _features.clear_feature(CPU_AVX512_FP16);
1016     _features.clear_feature(CPU_AVX10_1);
1017     _features.clear_feature(CPU_AVX10_2);
1018   }
1019 
1020   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1021   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1022   if (UseAPX && !apx_supported) {
1023     warning("UseAPX is not supported on this CPU, setting it to false");
1024     FLAG_SET_DEFAULT(UseAPX, false);
1025   } else if (FLAG_IS_DEFAULT(UseAPX)) {
1026     FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1027   }
1028 
1029   if (!UseAPX) {
1030     _features.clear_feature(CPU_APX_F);
1031   }
1032 
1033   if (UseAVX < 2) {
1034     _features.clear_feature(CPU_AVX2);
1035     _features.clear_feature(CPU_AVX_IFMA);
1036   }
1037 
1038   if (UseAVX < 1) {
1039     _features.clear_feature(CPU_AVX);
1040     _features.clear_feature(CPU_VZEROUPPER);
1041     _features.clear_feature(CPU_F16C);
1042     _features.clear_feature(CPU_SHA512);
1043   }
1044 
1045   if (logical_processors_per_package() == 1) {
1046     // HT processor could be installed on a system which doesn't support HT.
1047     _features.clear_feature(CPU_HT);
1048   }
1049 
1050   if (is_intel()) { // Intel cpus specific settings
1051     if (is_knights_family()) {
1052       _features.clear_feature(CPU_VZEROUPPER);
1053       _features.clear_feature(CPU_AVX512BW);
1054       _features.clear_feature(CPU_AVX512VL);
1055       _features.clear_feature(CPU_AVX512DQ);
1056       _features.clear_feature(CPU_AVX512_VNNI);
1057       _features.clear_feature(CPU_AVX512_VAES);
1058       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1059       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1060       _features.clear_feature(CPU_AVX512_VBMI);
1061       _features.clear_feature(CPU_AVX512_VBMI2);
1062       _features.clear_feature(CPU_CLWB);
1063       _features.clear_feature(CPU_FLUSHOPT);
1064       _features.clear_feature(CPU_GFNI);
1065       _features.clear_feature(CPU_AVX512_BITALG);
1066       _features.clear_feature(CPU_AVX512_IFMA);
1067       _features.clear_feature(CPU_AVX_IFMA);
1068       _features.clear_feature(CPU_AVX512_FP16);
1069       _features.clear_feature(CPU_AVX10_1);
1070       _features.clear_feature(CPU_AVX10_2);
1071     }
1072   }
1073 
1074   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1075     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1076   } else {
1077     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1078   }
1079 
1080   assert(supports_clflush(), "Always present");
1081   if (X86ICacheSync == -1) {
1082     // Auto-detect, choosing the best performant one that still flushes
1083     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1084     if (supports_clwb()) {
1085       FLAG_SET_ERGO(X86ICacheSync, 3);
1086     } else if (supports_clflushopt()) {
1087       FLAG_SET_ERGO(X86ICacheSync, 2);
1088     } else {
1089       FLAG_SET_ERGO(X86ICacheSync, 1);
1090     }
1091   } else {
1092     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1093       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1094     }
1095     if ((X86ICacheSync == 3) && !supports_clwb()) {
1096       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1097     }
1098     if ((X86ICacheSync == 5) && !supports_serialize()) {
1099       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1100     }
1101   }
1102 
1103   char buf[2048];
1104   size_t cpu_info_size = jio_snprintf(
1105               buf, sizeof(buf),
1106               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1107               cores_per_cpu(), threads_per_core(),
1108               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1109   assert(cpu_info_size > 0, "not enough temporary space allocated");
1110 
1111   insert_features_names(_features, buf + cpu_info_size, sizeof(buf) - cpu_info_size);
1112 
1113   _cpu_info_string = os::strdup(buf);
1114 
1115   _features_string = extract_features_string(_cpu_info_string,
1116                                              strnlen(_cpu_info_string, sizeof(buf)),
1117                                              cpu_info_size);
1118 
1119   // Use AES instructions if available.
1120   if (supports_aes()) {
1121     if (FLAG_IS_DEFAULT(UseAES)) {
1122       FLAG_SET_DEFAULT(UseAES, true);
1123     }
1124     if (!UseAES) {
1125       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1126         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1127       }
1128       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1129     } else {
1130       if (UseSSE > 2) {
1131         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1132           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1133         }
1134       } else {
1135         // The AES intrinsic stubs require AES instruction support (of course)
1136         // but also require sse3 mode or higher for instructions it use.
1137         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1138           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1139         }
1140         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1141       }
1142 
1143       // --AES-CTR begins--
1144       if (!UseAESIntrinsics) {
1145         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1146           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1147           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1148         }
1149       } else {
1150         if (supports_sse4_1()) {
1151           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1152             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1153           }
1154         } else {
1155            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1156            // but also require sse4.1 mode or higher for instructions it use.
1157           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1158              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1159            }
1160            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1161         }
1162       }
1163       // --AES-CTR ends--
1164     }
1165   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1166     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1167       warning("AES instructions are not available on this CPU");
1168       FLAG_SET_DEFAULT(UseAES, false);
1169     }
1170     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1171       warning("AES intrinsics are not available on this CPU");
1172       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1173     }
1174     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1175       warning("AES-CTR intrinsics are not available on this CPU");
1176       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1177     }
1178   }
1179 
1180   // Use CLMUL instructions if available.
1181   if (supports_clmul()) {
1182     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1183       UseCLMUL = true;
1184     }
1185   } else if (UseCLMUL) {
1186     if (!FLAG_IS_DEFAULT(UseCLMUL))
1187       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1188     FLAG_SET_DEFAULT(UseCLMUL, false);
1189   }
1190 
1191   if (UseCLMUL && (UseSSE > 2)) {
1192     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1193       UseCRC32Intrinsics = true;
1194     }
1195   } else if (UseCRC32Intrinsics) {
1196     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1197       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1198     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1199   }
1200 
1201   if (supports_avx2()) {
1202     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1203       UseAdler32Intrinsics = true;
1204     }
1205   } else if (UseAdler32Intrinsics) {
1206     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1207       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1208     }
1209     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1210   }
1211 
1212   if (supports_sse4_2() && supports_clmul()) {
1213     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1214       UseCRC32CIntrinsics = true;
1215     }
1216   } else if (UseCRC32CIntrinsics) {
1217     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1218       warning("CRC32C intrinsics are not available on this CPU");
1219     }
1220     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1221   }
1222 
1223   // GHASH/GCM intrinsics
1224   if (UseCLMUL && (UseSSE > 2)) {
1225     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1226       UseGHASHIntrinsics = true;
1227     }
1228   } else if (UseGHASHIntrinsics) {
1229     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1230       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1231     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1232   }
1233 
1234   // ChaCha20 Intrinsics
1235   // As long as the system supports AVX as a baseline we can do a
1236   // SIMD-enabled block function.  StubGenerator makes the determination
1237   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1238   // version.
1239   if (UseAVX >= 1) {
1240       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1241           UseChaCha20Intrinsics = true;
1242       }
1243   } else if (UseChaCha20Intrinsics) {
1244       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1245           warning("ChaCha20 intrinsic requires AVX instructions");
1246       }
1247       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1248   }
1249 
1250   // Kyber Intrinsics
1251   // Currently we only have them for AVX512
1252 #ifdef _LP64
1253   if (supports_evex() && supports_avx512bw()) {
1254       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1255           UseKyberIntrinsics = true;
1256       }
1257   } else
1258 #endif
1259   if (UseKyberIntrinsics) {
1260      warning("Intrinsics for ML-KEM are not available on this CPU.");
1261      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1262   }
1263 
1264   // Dilithium Intrinsics
1265   // Currently we only have them for AVX512
1266   if (supports_evex() && supports_avx512bw()) {
1267       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1268           UseDilithiumIntrinsics = true;
1269       }
1270   } else if (UseDilithiumIntrinsics) {
1271       warning("Intrinsics for ML-DSA are not available on this CPU.");
1272       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1273   }
1274 
1275   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1276   if (UseAVX >= 2) {
1277     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1278       UseBASE64Intrinsics = true;
1279     }
1280   } else if (UseBASE64Intrinsics) {
1281      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1282       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1283     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1284   }
1285 
1286   if (supports_fma()) {
1287     if (FLAG_IS_DEFAULT(UseFMA)) {
1288       UseFMA = true;
1289     }
1290   } else if (UseFMA) {
1291     warning("FMA instructions are not available on this CPU");
1292     FLAG_SET_DEFAULT(UseFMA, false);
1293   }
1294 
1295   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1296     UseMD5Intrinsics = true;
1297   }
1298 
1299   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1300     if (FLAG_IS_DEFAULT(UseSHA)) {
1301       UseSHA = true;
1302     }
1303   } else if (UseSHA) {
1304     warning("SHA instructions are not available on this CPU");
1305     FLAG_SET_DEFAULT(UseSHA, false);
1306   }
1307 
1308   if (supports_sha() && supports_sse4_1() && UseSHA) {
1309     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1310       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1311     }
1312   } else if (UseSHA1Intrinsics) {
1313     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1314     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1315   }
1316 
1317   if (supports_sse4_1() && UseSHA) {
1318     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1319       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1320     }
1321   } else if (UseSHA256Intrinsics) {
1322     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1323     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1324   }
1325 
1326   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1327     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1328       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1329     }
1330   } else if (UseSHA512Intrinsics) {
1331     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1332     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1333   }
1334 
1335   if (supports_evex() && supports_avx512bw()) {
1336       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1337           UseSHA3Intrinsics = true;
1338       }
1339   } else if (UseSHA3Intrinsics) {
1340       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1341       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1342   }
1343 
1344   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1345     FLAG_SET_DEFAULT(UseSHA, false);
1346   }
1347 
1348 #if COMPILER2_OR_JVMCI
1349   int max_vector_size = 0;
1350   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1351     // 16 byte vectors (in XMM) are supported with SSE2+
1352     max_vector_size = 16;
1353   } else if (UseAVX == 1 || UseAVX == 2) {
1354     // 32 bytes vectors (in YMM) are only supported with AVX+
1355     max_vector_size = 32;
1356   } else if (UseAVX > 2) {
1357     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1358     max_vector_size = 64;
1359   }
1360 
1361   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1362 
1363   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1364     if (MaxVectorSize < min_vector_size) {
1365       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1366       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1367     }
1368     if (MaxVectorSize > max_vector_size) {
1369       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1370       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1371     }
1372     if (!is_power_of_2(MaxVectorSize)) {
1373       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1374       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1375     }
1376   } else {
1377     // If default, use highest supported configuration
1378     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1379   }
1380 
1381 #if defined(COMPILER2) && defined(ASSERT)
1382   if (MaxVectorSize > 0) {
1383     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1384       tty->print_cr("State of YMM registers after signal handle:");
1385       int nreg = 4;
1386       const char* ymm_name[4] = {"0", "7", "8", "15"};
1387       for (int i = 0; i < nreg; i++) {
1388         tty->print("YMM%s:", ymm_name[i]);
1389         for (int j = 7; j >=0; j--) {
1390           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1391         }
1392         tty->cr();
1393       }
1394     }
1395   }
1396 #endif // COMPILER2 && ASSERT
1397 
1398   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1399     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1400       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1401     }
1402   } else if (UsePoly1305Intrinsics) {
1403     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1404     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1405   }
1406 
1407   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1408     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1409       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1410     }
1411   } else if (UseIntPolyIntrinsics) {
1412     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1413     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1414   }
1415 
1416   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1417     UseMultiplyToLenIntrinsic = true;
1418   }
1419   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1420     UseSquareToLenIntrinsic = true;
1421   }
1422   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1423     UseMulAddIntrinsic = true;
1424   }
1425   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1426     UseMontgomeryMultiplyIntrinsic = true;
1427   }
1428   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1429     UseMontgomerySquareIntrinsic = true;
1430   }
1431 #endif // COMPILER2_OR_JVMCI
1432 
1433   // On new cpus instructions which update whole XMM register should be used
1434   // to prevent partial register stall due to dependencies on high half.
1435   //
1436   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1437   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1438   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1439   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1440 
1441 
1442   if (is_zx()) { // ZX cpus specific settings
1443     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1444       UseStoreImmI16 = false; // don't use it on ZX cpus
1445     }
1446     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1447       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1448         // Use it on all ZX cpus
1449         UseAddressNop = true;
1450       }
1451     }
1452     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1453       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1454     }
1455     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1456       if (supports_sse3()) {
1457         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1458       } else {
1459         UseXmmRegToRegMoveAll = false;
1460       }
1461     }
1462     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1463 #ifdef COMPILER2
1464       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1465         // For new ZX cpus do the next optimization:
1466         // don't align the beginning of a loop if there are enough instructions
1467         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1468         // in current fetch line (OptoLoopAlignment) or the padding
1469         // is big (> MaxLoopPad).
1470         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1471         // generated NOP instructions. 11 is the largest size of one
1472         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1473         MaxLoopPad = 11;
1474       }
1475 #endif // COMPILER2
1476       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1477         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1478       }
1479       if (supports_sse4_2()) { // new ZX cpus
1480         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1481           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1482         }
1483       }
1484       if (supports_sse4_2()) {
1485         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1486           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1487         }
1488       } else {
1489         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1490           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1491         }
1492         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1493       }
1494     }
1495 
1496     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1497       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1498     }
1499   }
1500 
1501   if (is_amd_family()) { // AMD cpus specific settings
1502     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1503       // Use it on new AMD cpus starting from Opteron.
1504       UseAddressNop = true;
1505     }
1506     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1507       // Use it on new AMD cpus starting from Opteron.
1508       UseNewLongLShift = true;
1509     }
1510     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1511       if (supports_sse4a()) {
1512         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1513       } else {
1514         UseXmmLoadAndClearUpper = false;
1515       }
1516     }
1517     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1518       if (supports_sse4a()) {
1519         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1520       } else {
1521         UseXmmRegToRegMoveAll = false;
1522       }
1523     }
1524     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1525       if (supports_sse4a()) {
1526         UseXmmI2F = true;
1527       } else {
1528         UseXmmI2F = false;
1529       }
1530     }
1531     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1532       if (supports_sse4a()) {
1533         UseXmmI2D = true;
1534       } else {
1535         UseXmmI2D = false;
1536       }
1537     }
1538     if (supports_sse4_2()) {
1539       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1540         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1541       }
1542     } else {
1543       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1544         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1545       }
1546       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1547     }
1548 
1549     // some defaults for AMD family 15h
1550     if (cpu_family() == 0x15) {
1551       // On family 15h processors default is no sw prefetch
1552       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1553         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1554       }
1555       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1556       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1557         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1558       }
1559       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1560       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1561         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1562       }
1563       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1564         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1565       }
1566     }
1567 
1568 #ifdef COMPILER2
1569     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1570       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1571       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1572     }
1573 #endif // COMPILER2
1574 
1575     // Some defaults for AMD family >= 17h && Hygon family 18h
1576     if (cpu_family() >= 0x17) {
1577       // On family >=17h processors use XMM and UnalignedLoadStores
1578       // for Array Copy
1579       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1580         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1581       }
1582       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1583         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1584       }
1585 #ifdef COMPILER2
1586       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1587         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1588       }
1589 #endif
1590     }
1591   }
1592 
1593   if (is_intel()) { // Intel cpus specific settings
1594     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1595       UseStoreImmI16 = false; // don't use it on Intel cpus
1596     }
1597     if (cpu_family() == 6 || cpu_family() == 15) {
1598       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1599         // Use it on all Intel cpus starting from PentiumPro
1600         UseAddressNop = true;
1601       }
1602     }
1603     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1604       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1605     }
1606     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1607       if (supports_sse3()) {
1608         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1609       } else {
1610         UseXmmRegToRegMoveAll = false;
1611       }
1612     }
1613     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1614 #ifdef COMPILER2
1615       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1616         // For new Intel cpus do the next optimization:
1617         // don't align the beginning of a loop if there are enough instructions
1618         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1619         // in current fetch line (OptoLoopAlignment) or the padding
1620         // is big (> MaxLoopPad).
1621         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1622         // generated NOP instructions. 11 is the largest size of one
1623         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1624         MaxLoopPad = 11;
1625       }
1626 #endif // COMPILER2
1627 
1628       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1629         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1630       }
1631       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1632         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1633           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1634         }
1635       }
1636       if (supports_sse4_2()) {
1637         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1638           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1639         }
1640       } else {
1641         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1642           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1643         }
1644         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1645       }
1646     }
1647     if (is_atom_family() || is_knights_family()) {
1648 #ifdef COMPILER2
1649       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1650         OptoScheduling = true;
1651       }
1652 #endif
1653       if (supports_sse4_2()) { // Silvermont
1654         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1655           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1656         }
1657       }
1658       if (FLAG_IS_DEFAULT(UseIncDec)) {
1659         FLAG_SET_DEFAULT(UseIncDec, false);
1660       }
1661     }
1662     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1663       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1664     }
1665 #ifdef COMPILER2
1666     if (UseAVX > 2) {
1667       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1668           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1669            ArrayOperationPartialInlineSize != 0 &&
1670            ArrayOperationPartialInlineSize != 16 &&
1671            ArrayOperationPartialInlineSize != 32 &&
1672            ArrayOperationPartialInlineSize != 64)) {
1673         int inline_size = 0;
1674         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1675           inline_size = 64;
1676         } else if (MaxVectorSize >= 32) {
1677           inline_size = 32;
1678         } else if (MaxVectorSize >= 16) {
1679           inline_size = 16;
1680         }
1681         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1682           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1683         }
1684         ArrayOperationPartialInlineSize = inline_size;
1685       }
1686 
1687       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1688         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1689         if (ArrayOperationPartialInlineSize) {
1690           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1691         } else {
1692           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1693         }
1694       }
1695     }
1696 #endif
1697   }
1698 
1699 #ifdef COMPILER2
1700   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1701     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1702       OptimizeFill = false;
1703     }
1704   }
1705 #endif
1706 
1707   if (UseSSE42Intrinsics) {
1708     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1709       UseVectorizedMismatchIntrinsic = true;
1710     }
1711   } else if (UseVectorizedMismatchIntrinsic) {
1712     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1713       warning("vectorizedMismatch intrinsics are not available on this CPU");
1714     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1715   }
1716   if (UseAVX >= 2) {
1717     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1718   } else if (UseVectorizedHashCodeIntrinsic) {
1719     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1720       warning("vectorizedHashCode intrinsics are not available on this CPU");
1721     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1722   }
1723 
1724   // Use count leading zeros count instruction if available.
1725   if (supports_lzcnt()) {
1726     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1727       UseCountLeadingZerosInstruction = true;
1728     }
1729    } else if (UseCountLeadingZerosInstruction) {
1730     warning("lzcnt instruction is not available on this CPU");
1731     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1732   }
1733 
1734   // Use count trailing zeros instruction if available
1735   if (supports_bmi1()) {
1736     // tzcnt does not require VEX prefix
1737     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1738       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1739         // Don't use tzcnt if BMI1 is switched off on command line.
1740         UseCountTrailingZerosInstruction = false;
1741       } else {
1742         UseCountTrailingZerosInstruction = true;
1743       }
1744     }
1745   } else if (UseCountTrailingZerosInstruction) {
1746     warning("tzcnt instruction is not available on this CPU");
1747     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1748   }
1749 
1750   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1751   // VEX prefix is generated only when AVX > 0.
1752   if (supports_bmi1() && supports_avx()) {
1753     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1754       UseBMI1Instructions = true;
1755     }
1756   } else if (UseBMI1Instructions) {
1757     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1758     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1759   }
1760 
1761   if (supports_bmi2() && supports_avx()) {
1762     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1763       UseBMI2Instructions = true;
1764     }
1765   } else if (UseBMI2Instructions) {
1766     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1767     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1768   }
1769 
1770   // Use population count instruction if available.
1771   if (supports_popcnt()) {
1772     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1773       UsePopCountInstruction = true;
1774     }
1775   } else if (UsePopCountInstruction) {
1776     warning("POPCNT instruction is not available on this CPU");
1777     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1778   }
1779 
1780   // Use fast-string operations if available.
1781   if (supports_erms()) {
1782     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1783       UseFastStosb = true;
1784     }
1785   } else if (UseFastStosb) {
1786     warning("fast-string operations are not available on this CPU");
1787     FLAG_SET_DEFAULT(UseFastStosb, false);
1788   }
1789 
1790   // For AMD Processors use XMM/YMM MOVDQU instructions
1791   // for Object Initialization as default
1792   if (is_amd() && cpu_family() >= 0x19) {
1793     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1794       UseFastStosb = false;
1795     }
1796   }
1797 
1798 #ifdef COMPILER2
1799   if (is_intel() && MaxVectorSize > 16) {
1800     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1801       UseFastStosb = false;
1802     }
1803   }
1804 #endif
1805 
1806   // Use XMM/YMM MOVDQU instruction for Object Initialization
1807   if (!UseFastStosb && UseUnalignedLoadStores) {
1808     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1809       UseXMMForObjInit = true;
1810     }
1811   } else if (UseXMMForObjInit) {
1812     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1813     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1814   }
1815 
1816 #ifdef COMPILER2
1817   if (FLAG_IS_DEFAULT(AlignVector)) {
1818     // Modern processors allow misaligned memory operations for vectors.
1819     AlignVector = !UseUnalignedLoadStores;
1820   }
1821 #endif // COMPILER2
1822 
1823   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1824     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1825       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1826     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1827       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1828     }
1829   }
1830 
1831   // Allocation prefetch settings
1832   int cache_line_size = checked_cast<int>(prefetch_data_size());
1833   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1834       (cache_line_size > AllocatePrefetchStepSize)) {
1835     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1836   }
1837 
1838   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1839     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1840     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1841       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1842     }
1843     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1844   }
1845 
1846   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1847     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1848     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1849   }
1850 
1851   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1852     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1853         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1854       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1855     }
1856 #ifdef COMPILER2
1857     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1858       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1859     }
1860 #endif
1861   }
1862 
1863   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1864 #ifdef COMPILER2
1865     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1866       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1867     }
1868 #endif
1869   }
1870 
1871   // Prefetch settings
1872 
1873   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1874   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1875   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1876   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1877 
1878   // gc copy/scan is disabled if prefetchw isn't supported, because
1879   // Prefetch::write emits an inlined prefetchw on Linux.
1880   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1881   // The used prefetcht0 instruction works for both amd64 and em64t.
1882 
1883   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1884     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1885   }
1886   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1887     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1888   }
1889 
1890   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1891      (cache_line_size > ContendedPaddingWidth))
1892      ContendedPaddingWidth = cache_line_size;
1893 
1894   // This machine allows unaligned memory accesses
1895   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1896     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1897   }
1898 
1899 #ifndef PRODUCT
1900   if (log_is_enabled(Info, os, cpu)) {
1901     LogStream ls(Log(os, cpu)::info());
1902     outputStream* log = &ls;
1903     log->print_cr("Logical CPUs per core: %u",
1904                   logical_processors_per_package());
1905     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1906     log->print("UseSSE=%d", UseSSE);
1907     if (UseAVX > 0) {
1908       log->print("  UseAVX=%d", UseAVX);
1909     }
1910     if (UseAES) {
1911       log->print("  UseAES=1");
1912     }
1913 #ifdef COMPILER2
1914     if (MaxVectorSize > 0) {
1915       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1916     }
1917 #endif
1918     log->cr();
1919     log->print("Allocation");
1920     if (AllocatePrefetchStyle <= 0) {
1921       log->print_cr(": no prefetching");
1922     } else {
1923       log->print(" prefetching: ");
1924       if (AllocatePrefetchInstr == 0) {
1925         log->print("PREFETCHNTA");
1926       } else if (AllocatePrefetchInstr == 1) {
1927         log->print("PREFETCHT0");
1928       } else if (AllocatePrefetchInstr == 2) {
1929         log->print("PREFETCHT2");
1930       } else if (AllocatePrefetchInstr == 3) {
1931         log->print("PREFETCHW");
1932       }
1933       if (AllocatePrefetchLines > 1) {
1934         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1935       } else {
1936         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1937       }
1938     }
1939 
1940     if (PrefetchCopyIntervalInBytes > 0) {
1941       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1942     }
1943     if (PrefetchScanIntervalInBytes > 0) {
1944       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1945     }
1946     if (ContendedPaddingWidth > 0) {
1947       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1948     }
1949   }
1950 #endif // !PRODUCT
1951   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1952       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1953   }
1954   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1955       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1956   }
1957 }
1958 
1959 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1960   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1961   if (vrt == XenHVM) {
1962     st->print_cr("Xen hardware-assisted virtualization detected");
1963   } else if (vrt == KVM) {
1964     st->print_cr("KVM virtualization detected");
1965   } else if (vrt == VMWare) {
1966     st->print_cr("VMWare virtualization detected");
1967     VirtualizationSupport::print_virtualization_info(st);
1968   } else if (vrt == HyperV) {
1969     st->print_cr("Hyper-V virtualization detected");
1970   } else if (vrt == HyperVRole) {
1971     st->print_cr("Hyper-V role detected");
1972   }
1973 }
1974 
1975 bool VM_Version::compute_has_intel_jcc_erratum() {
1976   if (!is_intel_family_core()) {
1977     // Only Intel CPUs are affected.
1978     return false;
1979   }
1980   // The following table of affected CPUs is based on the following document released by Intel:
1981   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1982   switch (_model) {
1983   case 0x8E:
1984     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1985     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1986     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1987     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1988     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1989     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1990     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1991     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1992     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1993     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1994   case 0x4E:
1995     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1996     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1997     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1998     return _stepping == 0x3;
1999   case 0x55:
2000     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2001     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2002     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2003     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2004     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2005     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2006     return _stepping == 0x4 || _stepping == 0x7;
2007   case 0x5E:
2008     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2009     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2010     return _stepping == 0x3;
2011   case 0x9E:
2012     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2013     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2014     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2015     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2016     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2017     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2018     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2019     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2020     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2021     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2022     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2023     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2024     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2025     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2026     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2027   case 0xA5:
2028     // Not in Intel documentation.
2029     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2030     return true;
2031   case 0xA6:
2032     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2033     return _stepping == 0x0;
2034   case 0xAE:
2035     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2036     return _stepping == 0xA;
2037   default:
2038     // If we are running on another intel machine not recognized in the table, we are okay.
2039     return false;
2040   }
2041 }
2042 
2043 // On Xen, the cpuid instruction returns
2044 //  eax / registers[0]: Version of Xen
2045 //  ebx / registers[1]: chars 'XenV'
2046 //  ecx / registers[2]: chars 'MMXe'
2047 //  edx / registers[3]: chars 'nVMM'
2048 //
2049 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2050 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2051 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2052 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2053 //
2054 // more information :
2055 // https://kb.vmware.com/s/article/1009458
2056 //
2057 void VM_Version::check_virtualizations() {
2058   uint32_t registers[4] = {0};
2059   char signature[13] = {0};
2060 
2061   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2062   // from 0x40000000 until 0x40010000.
2063   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2064   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2065     detect_virt_stub(leaf, registers);
2066     memcpy(signature, &registers[1], 12);
2067 
2068     if (strncmp("VMwareVMware", signature, 12) == 0) {
2069       Abstract_VM_Version::_detected_virtualization = VMWare;
2070       // check for extended metrics from guestlib
2071       VirtualizationSupport::initialize();
2072     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2073       Abstract_VM_Version::_detected_virtualization = HyperV;
2074 #ifdef _WINDOWS
2075       // CPUID leaf 0x40000007 is available to the root partition only.
2076       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2077       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2078       detect_virt_stub(0x40000007, registers);
2079       if ((registers[0] != 0x0) ||
2080           (registers[1] != 0x0) ||
2081           (registers[2] != 0x0) ||
2082           (registers[3] != 0x0)) {
2083         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2084       }
2085 #endif
2086     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2087       Abstract_VM_Version::_detected_virtualization = KVM;
2088     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2089       Abstract_VM_Version::_detected_virtualization = XenHVM;
2090     }
2091   }
2092 }
2093 
2094 #ifdef COMPILER2
2095 // Determine if it's running on Cascade Lake using default options.
2096 bool VM_Version::is_default_intel_cascade_lake() {
2097   return FLAG_IS_DEFAULT(UseAVX) &&
2098          FLAG_IS_DEFAULT(MaxVectorSize) &&
2099          UseAVX > 2 &&
2100          is_intel_cascade_lake();
2101 }
2102 #endif
2103 
2104 bool VM_Version::is_intel_cascade_lake() {
2105   return is_intel_skylake() && _stepping >= 5;
2106 }
2107 
2108 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2109 // for implementing the array copy and clear operations.
2110 // The Intel platforms that supports the serialize instruction
2111 // has improved implementation of 64-byte load/stores and so the default
2112 // threshold is set to 0 for these platforms.
2113 int VM_Version::avx3_threshold() {
2114   return (is_intel_family_core() &&
2115           supports_serialize() &&
2116           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2117 }
2118 
2119 void VM_Version::clear_apx_test_state() {
2120   clear_apx_test_state_stub();
2121 }
2122 
2123 static bool _vm_version_initialized = false;
2124 
2125 void VM_Version::initialize() {
2126   ResourceMark rm;
2127 
2128   // Making this stub must be FIRST use of assembler
2129   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2130   if (stub_blob == nullptr) {
2131     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2132   }
2133   CodeBuffer c(stub_blob);
2134   VM_Version_StubGenerator g(&c);
2135 
2136   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2137                                      g.generate_get_cpu_info());
2138   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2139                                      g.generate_detect_virt());
2140   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2141                                      g.clear_apx_test_state());
2142   get_processor_features();
2143 
2144   Assembler::precompute_instructions();
2145 
2146   if (VM_Version::supports_hv()) { // Supports hypervisor
2147     check_virtualizations();
2148   }
2149   _vm_version_initialized = true;
2150 }
2151 
2152 typedef enum {
2153    CPU_FAMILY_8086_8088  = 0,
2154    CPU_FAMILY_INTEL_286  = 2,
2155    CPU_FAMILY_INTEL_386  = 3,
2156    CPU_FAMILY_INTEL_486  = 4,
2157    CPU_FAMILY_PENTIUM    = 5,
2158    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2159    CPU_FAMILY_PENTIUM_4  = 0xF
2160 } FamilyFlag;
2161 
2162 typedef enum {
2163   RDTSCP_FLAG  = 0x08000000, // bit 27
2164   INTEL64_FLAG = 0x20000000  // bit 29
2165 } _featureExtendedEdxFlag;
2166 
2167 typedef enum {
2168    FPU_FLAG     = 0x00000001,
2169    VME_FLAG     = 0x00000002,
2170    DE_FLAG      = 0x00000004,
2171    PSE_FLAG     = 0x00000008,
2172    TSC_FLAG     = 0x00000010,
2173    MSR_FLAG     = 0x00000020,
2174    PAE_FLAG     = 0x00000040,
2175    MCE_FLAG     = 0x00000080,
2176    CX8_FLAG     = 0x00000100,
2177    APIC_FLAG    = 0x00000200,
2178    SEP_FLAG     = 0x00000800,
2179    MTRR_FLAG    = 0x00001000,
2180    PGE_FLAG     = 0x00002000,
2181    MCA_FLAG     = 0x00004000,
2182    CMOV_FLAG    = 0x00008000,
2183    PAT_FLAG     = 0x00010000,
2184    PSE36_FLAG   = 0x00020000,
2185    PSNUM_FLAG   = 0x00040000,
2186    CLFLUSH_FLAG = 0x00080000,
2187    DTS_FLAG     = 0x00200000,
2188    ACPI_FLAG    = 0x00400000,
2189    MMX_FLAG     = 0x00800000,
2190    FXSR_FLAG    = 0x01000000,
2191    SSE_FLAG     = 0x02000000,
2192    SSE2_FLAG    = 0x04000000,
2193    SS_FLAG      = 0x08000000,
2194    HTT_FLAG     = 0x10000000,
2195    TM_FLAG      = 0x20000000
2196 } FeatureEdxFlag;
2197 
2198 static BufferBlob* cpuid_brand_string_stub_blob;
2199 static const int   cpuid_brand_string_stub_size = 550;
2200 
2201 extern "C" {
2202   typedef void (*getCPUIDBrandString_stub_t)(void*);
2203 }
2204 
2205 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2206 
2207 // VM_Version statics
2208 enum {
2209   ExtendedFamilyIdLength_INTEL = 16,
2210   ExtendedFamilyIdLength_AMD   = 24
2211 };
2212 
2213 const size_t VENDOR_LENGTH = 13;
2214 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2215 static char* _cpu_brand_string = nullptr;
2216 static int64_t _max_qualified_cpu_frequency = 0;
2217 
2218 static int _no_of_threads = 0;
2219 static int _no_of_cores = 0;
2220 
2221 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2222   "8086/8088",
2223   "",
2224   "286",
2225   "386",
2226   "486",
2227   "Pentium",
2228   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2229   "",
2230   "",
2231   "",
2232   "",
2233   "",
2234   "",
2235   "",
2236   "",
2237   "Pentium 4"
2238 };
2239 
2240 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2241   "",
2242   "",
2243   "",
2244   "",
2245   "5x86",
2246   "K5/K6",
2247   "Athlon/AthlonXP",
2248   "",
2249   "",
2250   "",
2251   "",
2252   "",
2253   "",
2254   "",
2255   "",
2256   "Opteron/Athlon64",
2257   "Opteron QC/Phenom",  // Barcelona et.al.
2258   "",
2259   "",
2260   "",
2261   "",
2262   "",
2263   "",
2264   "Zen"
2265 };
2266 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2267 // September 2013, Vol 3C Table 35-1
2268 const char* const _model_id_pentium_pro[] = {
2269   "",
2270   "Pentium Pro",
2271   "",
2272   "Pentium II model 3",
2273   "",
2274   "Pentium II model 5/Xeon/Celeron",
2275   "Celeron",
2276   "Pentium III/Pentium III Xeon",
2277   "Pentium III/Pentium III Xeon",
2278   "Pentium M model 9",    // Yonah
2279   "Pentium III, model A",
2280   "Pentium III, model B",
2281   "",
2282   "Pentium M model D",    // Dothan
2283   "",
2284   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2285   "",
2286   "",
2287   "",
2288   "",
2289   "",
2290   "",
2291   "Celeron",              // 0x16 Celeron 65nm
2292   "Core 2",               // 0x17 Penryn / Harpertown
2293   "",
2294   "",
2295   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2296   "Atom",                 // 0x1B Z5xx series Silverthorn
2297   "",
2298   "Core 2",               // 0x1D Dunnington (6-core)
2299   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2300   "",
2301   "",
2302   "",
2303   "",
2304   "",
2305   "",
2306   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2307   "",
2308   "",
2309   "",                     // 0x28
2310   "",
2311   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2312   "",
2313   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2314   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2315   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2316   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2317   "",
2318   "",
2319   "",
2320   "",
2321   "",
2322   "",
2323   "",
2324   "",
2325   "",
2326   "",
2327   "Ivy Bridge",           // 0x3a
2328   "",
2329   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2330   "",                     // 0x3d "Next Generation Intel Core Processor"
2331   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2332   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2333   "",
2334   "",
2335   "",
2336   "",
2337   "",
2338   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2339   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2340   nullptr
2341 };
2342 
2343 /* Brand ID is for back compatibility
2344  * Newer CPUs uses the extended brand string */
2345 const char* const _brand_id[] = {
2346   "",
2347   "Celeron processor",
2348   "Pentium III processor",
2349   "Intel Pentium III Xeon processor",
2350   "",
2351   "",
2352   "",
2353   "",
2354   "Intel Pentium 4 processor",
2355   nullptr
2356 };
2357 
2358 
2359 const char* const _feature_edx_id[] = {
2360   "On-Chip FPU",
2361   "Virtual Mode Extensions",
2362   "Debugging Extensions",
2363   "Page Size Extensions",
2364   "Time Stamp Counter",
2365   "Model Specific Registers",
2366   "Physical Address Extension",
2367   "Machine Check Exceptions",
2368   "CMPXCHG8B Instruction",
2369   "On-Chip APIC",
2370   "",
2371   "Fast System Call",
2372   "Memory Type Range Registers",
2373   "Page Global Enable",
2374   "Machine Check Architecture",
2375   "Conditional Mov Instruction",
2376   "Page Attribute Table",
2377   "36-bit Page Size Extension",
2378   "Processor Serial Number",
2379   "CLFLUSH Instruction",
2380   "",
2381   "Debug Trace Store feature",
2382   "ACPI registers in MSR space",
2383   "Intel Architecture MMX Technology",
2384   "Fast Float Point Save and Restore",
2385   "Streaming SIMD extensions",
2386   "Streaming SIMD extensions 2",
2387   "Self-Snoop",
2388   "Hyper Threading",
2389   "Thermal Monitor",
2390   "",
2391   "Pending Break Enable"
2392 };
2393 
2394 const char* const _feature_extended_edx_id[] = {
2395   "",
2396   "",
2397   "",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "SYSCALL/SYSRET",
2407   "",
2408   "",
2409   "",
2410   "",
2411   "",
2412   "",
2413   "",
2414   "",
2415   "Execute Disable Bit",
2416   "",
2417   "",
2418   "",
2419   "",
2420   "",
2421   "",
2422   "RDTSCP",
2423   "",
2424   "Intel 64 Architecture",
2425   "",
2426   ""
2427 };
2428 
2429 const char* const _feature_ecx_id[] = {
2430   "Streaming SIMD Extensions 3",
2431   "PCLMULQDQ",
2432   "64-bit DS Area",
2433   "MONITOR/MWAIT instructions",
2434   "CPL Qualified Debug Store",
2435   "Virtual Machine Extensions",
2436   "Safer Mode Extensions",
2437   "Enhanced Intel SpeedStep technology",
2438   "Thermal Monitor 2",
2439   "Supplemental Streaming SIMD Extensions 3",
2440   "L1 Context ID",
2441   "",
2442   "Fused Multiply-Add",
2443   "CMPXCHG16B",
2444   "xTPR Update Control",
2445   "Perfmon and Debug Capability",
2446   "",
2447   "Process-context identifiers",
2448   "Direct Cache Access",
2449   "Streaming SIMD extensions 4.1",
2450   "Streaming SIMD extensions 4.2",
2451   "x2APIC",
2452   "MOVBE",
2453   "Popcount instruction",
2454   "TSC-Deadline",
2455   "AESNI",
2456   "XSAVE",
2457   "OSXSAVE",
2458   "AVX",
2459   "F16C",
2460   "RDRAND",
2461   ""
2462 };
2463 
2464 const char* const _feature_extended_ecx_id[] = {
2465   "LAHF/SAHF instruction support",
2466   "Core multi-processor legacy mode",
2467   "",
2468   "",
2469   "",
2470   "Advanced Bit Manipulations: LZCNT",
2471   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2472   "Misaligned SSE mode",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   "",
2484   "",
2485   "",
2486   "",
2487   "",
2488   "",
2489   "",
2490   "",
2491   "",
2492   "",
2493   "",
2494   "",
2495   "",
2496   ""
2497 };
2498 
2499 void VM_Version::initialize_tsc(void) {
2500   ResourceMark rm;
2501 
2502   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2503   if (cpuid_brand_string_stub_blob == nullptr) {
2504     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2505   }
2506   CodeBuffer c(cpuid_brand_string_stub_blob);
2507   VM_Version_StubGenerator g(&c);
2508   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2509                                    g.generate_getCPUIDBrandString());
2510 }
2511 
2512 const char* VM_Version::cpu_model_description(void) {
2513   uint32_t cpu_family = extended_cpu_family();
2514   uint32_t cpu_model = extended_cpu_model();
2515   const char* model = nullptr;
2516 
2517   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2518     for (uint32_t i = 0; i <= cpu_model; i++) {
2519       model = _model_id_pentium_pro[i];
2520       if (model == nullptr) {
2521         break;
2522       }
2523     }
2524   }
2525   return model;
2526 }
2527 
2528 const char* VM_Version::cpu_brand_string(void) {
2529   if (_cpu_brand_string == nullptr) {
2530     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2531     if (nullptr == _cpu_brand_string) {
2532       return nullptr;
2533     }
2534     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2535     if (ret_val != OS_OK) {
2536       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2537       _cpu_brand_string = nullptr;
2538     }
2539   }
2540   return _cpu_brand_string;
2541 }
2542 
2543 const char* VM_Version::cpu_brand(void) {
2544   const char*  brand  = nullptr;
2545 
2546   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2547     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2548     brand = _brand_id[0];
2549     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2550       brand = _brand_id[i];
2551     }
2552   }
2553   return brand;
2554 }
2555 
2556 bool VM_Version::cpu_is_em64t(void) {
2557   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2558 }
2559 
2560 bool VM_Version::is_netburst(void) {
2561   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2562 }
2563 
2564 bool VM_Version::supports_tscinv_ext(void) {
2565   if (!supports_tscinv_bit()) {
2566     return false;
2567   }
2568 
2569   if (is_intel()) {
2570     return true;
2571   }
2572 
2573   if (is_amd()) {
2574     return !is_amd_Barcelona();
2575   }
2576 
2577   if (is_hygon()) {
2578     return true;
2579   }
2580 
2581   return false;
2582 }
2583 
2584 void VM_Version::resolve_cpu_information_details(void) {
2585 
2586   // in future we want to base this information on proper cpu
2587   // and cache topology enumeration such as:
2588   // Intel 64 Architecture Processor Topology Enumeration
2589   // which supports system cpu and cache topology enumeration
2590   // either using 2xAPICIDs or initial APICIDs
2591 
2592   // currently only rough cpu information estimates
2593   // which will not necessarily reflect the exact configuration of the system
2594 
2595   // this is the number of logical hardware threads
2596   // visible to the operating system
2597   _no_of_threads = os::processor_count();
2598 
2599   // find out number of threads per cpu package
2600   int threads_per_package = threads_per_core() * cores_per_cpu();
2601 
2602   // use amount of threads visible to the process in order to guess number of sockets
2603   _no_of_sockets = _no_of_threads / threads_per_package;
2604 
2605   // process might only see a subset of the total number of threads
2606   // from a single processor package. Virtualization/resource management for example.
2607   // If so then just write a hard 1 as num of pkgs.
2608   if (0 == _no_of_sockets) {
2609     _no_of_sockets = 1;
2610   }
2611 
2612   // estimate the number of cores
2613   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2614 }
2615 
2616 
2617 const char* VM_Version::cpu_family_description(void) {
2618   int cpu_family_id = extended_cpu_family();
2619   if (is_amd()) {
2620     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2621       return _family_id_amd[cpu_family_id];
2622     }
2623   }
2624   if (is_intel()) {
2625     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2626       return cpu_model_description();
2627     }
2628     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2629       return _family_id_intel[cpu_family_id];
2630     }
2631   }
2632   if (is_hygon()) {
2633     return "Dhyana";
2634   }
2635   return "Unknown x86";
2636 }
2637 
2638 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2639   assert(buf != nullptr, "buffer is null!");
2640   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2641 
2642   const char* cpu_type = nullptr;
2643   const char* x64 = nullptr;
2644 
2645   if (is_intel()) {
2646     cpu_type = "Intel";
2647     x64 = cpu_is_em64t() ? " Intel64" : "";
2648   } else if (is_amd()) {
2649     cpu_type = "AMD";
2650     x64 = cpu_is_em64t() ? " AMD64" : "";
2651   } else if (is_hygon()) {
2652     cpu_type = "Hygon";
2653     x64 = cpu_is_em64t() ? " AMD64" : "";
2654   } else {
2655     cpu_type = "Unknown x86";
2656     x64 = cpu_is_em64t() ? " x86_64" : "";
2657   }
2658 
2659   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2660     cpu_type,
2661     cpu_family_description(),
2662     supports_ht() ? " (HT)" : "",
2663     supports_sse3() ? " SSE3" : "",
2664     supports_ssse3() ? " SSSE3" : "",
2665     supports_sse4_1() ? " SSE4.1" : "",
2666     supports_sse4_2() ? " SSE4.2" : "",
2667     supports_sse4a() ? " SSE4A" : "",
2668     is_netburst() ? " Netburst" : "",
2669     is_intel_family_core() ? " Core" : "",
2670     x64);
2671 
2672   return OS_OK;
2673 }
2674 
2675 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2676   assert(buf != nullptr, "buffer is null!");
2677   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2678   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2679 
2680   // invoke newly generated asm code to fetch CPU Brand String
2681   getCPUIDBrandString_stub(&_cpuid_info);
2682 
2683   // fetch results into buffer
2684   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2685   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2686   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2687   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2688   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2689   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2690   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2691   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2692   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2693   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2694   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2695   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2696 
2697   return OS_OK;
2698 }
2699 
2700 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2701   guarantee(buf != nullptr, "buffer is null!");
2702   guarantee(buf_len > 0, "buffer len not enough!");
2703 
2704   unsigned int flag = 0;
2705   unsigned int fi = 0;
2706   size_t       written = 0;
2707   const char*  prefix = "";
2708 
2709 #define WRITE_TO_BUF(string)                                                          \
2710   {                                                                                   \
2711     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2712     if (res < 0) {                                                                    \
2713       return buf_len - 1;                                                             \
2714     }                                                                                 \
2715     written += res;                                                                   \
2716     if (prefix[0] == '\0') {                                                          \
2717       prefix = ", ";                                                                  \
2718     }                                                                                 \
2719   }
2720 
2721   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2722     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2723       continue; /* no hyperthreading */
2724     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2725       continue; /* no fast system call */
2726     }
2727     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2728       WRITE_TO_BUF(_feature_edx_id[fi]);
2729     }
2730   }
2731 
2732   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2733     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2734       WRITE_TO_BUF(_feature_ecx_id[fi]);
2735     }
2736   }
2737 
2738   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2739     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2740       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2741     }
2742   }
2743 
2744   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2745     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2746       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2747     }
2748   }
2749 
2750   if (supports_tscinv_bit()) {
2751       WRITE_TO_BUF("Invariant TSC");
2752   }
2753 
2754   return written;
2755 }
2756 
2757 /**
2758  * Write a detailed description of the cpu to a given buffer, including
2759  * feature set.
2760  */
2761 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2762   assert(buf != nullptr, "buffer is null!");
2763   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2764 
2765   static const char* unknown = "<unknown>";
2766   char               vendor_id[VENDOR_LENGTH];
2767   const char*        family = nullptr;
2768   const char*        model = nullptr;
2769   const char*        brand = nullptr;
2770   int                outputLen = 0;
2771 
2772   family = cpu_family_description();
2773   if (family == nullptr) {
2774     family = unknown;
2775   }
2776 
2777   model = cpu_model_description();
2778   if (model == nullptr) {
2779     model = unknown;
2780   }
2781 
2782   brand = cpu_brand_string();
2783 
2784   if (brand == nullptr) {
2785     brand = cpu_brand();
2786     if (brand == nullptr) {
2787       brand = unknown;
2788     }
2789   }
2790 
2791   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2792   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2793   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2794   vendor_id[VENDOR_LENGTH-1] = '\0';
2795 
2796   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2797     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2798     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2799     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2800     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2801     "Supports: ",
2802     brand,
2803     vendor_id,
2804     family,
2805     extended_cpu_family(),
2806     model,
2807     extended_cpu_model(),
2808     cpu_stepping(),
2809     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2810     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2811     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2812     _cpuid_info.std_cpuid1_eax.value,
2813     _cpuid_info.std_cpuid1_ebx.value,
2814     _cpuid_info.std_cpuid1_ecx.value,
2815     _cpuid_info.std_cpuid1_edx.value,
2816     _cpuid_info.ext_cpuid1_eax,
2817     _cpuid_info.ext_cpuid1_ebx,
2818     _cpuid_info.ext_cpuid1_ecx,
2819     _cpuid_info.ext_cpuid1_edx);
2820 
2821   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2822     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2823     return OS_ERR;
2824   }
2825 
2826   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2827 
2828   return OS_OK;
2829 }
2830 
2831 
2832 // Fill in Abstract_VM_Version statics
2833 void VM_Version::initialize_cpu_information() {
2834   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2835   assert(!_initialized, "shouldn't be initialized yet");
2836   resolve_cpu_information_details();
2837 
2838   // initialize cpu_name and cpu_desc
2839   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2840   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2841   _initialized = true;
2842 }
2843 
2844 /**
2845  *  For information about extracting the frequency from the cpu brand string, please see:
2846  *
2847  *    Intel Processor Identification and the CPUID Instruction
2848  *    Application Note 485
2849  *    May 2012
2850  *
2851  * The return value is the frequency in Hz.
2852  */
2853 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2854   const char* const brand_string = cpu_brand_string();
2855   if (brand_string == nullptr) {
2856     return 0;
2857   }
2858   const int64_t MEGA = 1000000;
2859   int64_t multiplier = 0;
2860   int64_t frequency = 0;
2861   uint8_t idx = 0;
2862   // The brand string buffer is at most 48 bytes.
2863   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2864   for (; idx < 48-2; ++idx) {
2865     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2866     // Search brand string for "yHz" where y is M, G, or T.
2867     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2868       if (brand_string[idx] == 'M') {
2869         multiplier = MEGA;
2870       } else if (brand_string[idx] == 'G') {
2871         multiplier = MEGA * 1000;
2872       } else if (brand_string[idx] == 'T') {
2873         multiplier = MEGA * MEGA;
2874       }
2875       break;
2876     }
2877   }
2878   if (multiplier > 0) {
2879     // Compute frequency (in Hz) from brand string.
2880     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2881       frequency =  (brand_string[idx-4] - '0') * multiplier;
2882       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2883       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2884     } else { // format is "xxxx"
2885       frequency =  (brand_string[idx-4] - '0') * 1000;
2886       frequency += (brand_string[idx-3] - '0') * 100;
2887       frequency += (brand_string[idx-2] - '0') * 10;
2888       frequency += (brand_string[idx-1] - '0');
2889       frequency *= multiplier;
2890     }
2891   }
2892   return frequency;
2893 }
2894 
2895 
2896 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2897   if (_max_qualified_cpu_frequency == 0) {
2898     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2899   }
2900   return _max_qualified_cpu_frequency;
2901 }
2902 
2903 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2904   VM_Features vm_features;
2905   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2906     vm_features.set_feature(CPU_CX8);
2907   if (std_cpuid1_edx.bits.cmov != 0)
2908     vm_features.set_feature(CPU_CMOV);
2909   if (std_cpuid1_edx.bits.clflush != 0)
2910     vm_features.set_feature(CPU_FLUSH);
2911   // clflush should always be available on x86_64
2912   // if not we are in real trouble because we rely on it
2913   // to flush the code cache.
2914   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2915   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2916       ext_cpuid1_edx.bits.fxsr != 0))
2917     vm_features.set_feature(CPU_FXSR);
2918   // HT flag is set for multi-core processors also.
2919   if (threads_per_core() > 1)
2920     vm_features.set_feature(CPU_HT);
2921   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2922       ext_cpuid1_edx.bits.mmx != 0))
2923     vm_features.set_feature(CPU_MMX);
2924   if (std_cpuid1_edx.bits.sse != 0)
2925     vm_features.set_feature(CPU_SSE);
2926   if (std_cpuid1_edx.bits.sse2 != 0)
2927     vm_features.set_feature(CPU_SSE2);
2928   if (std_cpuid1_ecx.bits.sse3 != 0)
2929     vm_features.set_feature(CPU_SSE3);
2930   if (std_cpuid1_ecx.bits.ssse3 != 0)
2931     vm_features.set_feature(CPU_SSSE3);
2932   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2933     vm_features.set_feature(CPU_SSE4_1);
2934   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2935     vm_features.set_feature(CPU_SSE4_2);
2936   if (std_cpuid1_ecx.bits.popcnt != 0)
2937     vm_features.set_feature(CPU_POPCNT);
2938   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2939       xem_xcr0_eax.bits.apx_f != 0) {
2940     vm_features.set_feature(CPU_APX_F);
2941   }
2942   if (std_cpuid1_ecx.bits.avx != 0 &&
2943       std_cpuid1_ecx.bits.osxsave != 0 &&
2944       xem_xcr0_eax.bits.sse != 0 &&
2945       xem_xcr0_eax.bits.ymm != 0) {
2946     vm_features.set_feature(CPU_AVX);
2947     vm_features.set_feature(CPU_VZEROUPPER);
2948     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2949       vm_features.set_feature(CPU_SHA512);
2950     if (std_cpuid1_ecx.bits.f16c != 0)
2951       vm_features.set_feature(CPU_F16C);
2952     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2953       vm_features.set_feature(CPU_AVX2);
2954       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2955         vm_features.set_feature(CPU_AVX_IFMA);
2956     }
2957     if (sef_cpuid7_ecx.bits.gfni != 0)
2958         vm_features.set_feature(CPU_GFNI);
2959     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2960         xem_xcr0_eax.bits.opmask != 0 &&
2961         xem_xcr0_eax.bits.zmm512 != 0 &&
2962         xem_xcr0_eax.bits.zmm32 != 0) {
2963       vm_features.set_feature(CPU_AVX512F);
2964       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2965         vm_features.set_feature(CPU_AVX512CD);
2966       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2967         vm_features.set_feature(CPU_AVX512DQ);
2968       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2969         vm_features.set_feature(CPU_AVX512_IFMA);
2970       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2971         vm_features.set_feature(CPU_AVX512PF);
2972       if (sef_cpuid7_ebx.bits.avx512er != 0)
2973         vm_features.set_feature(CPU_AVX512ER);
2974       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2975         vm_features.set_feature(CPU_AVX512BW);
2976       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2977         vm_features.set_feature(CPU_AVX512VL);
2978       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2979         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2980       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2981         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2982       if (sef_cpuid7_ecx.bits.vaes != 0)
2983         vm_features.set_feature(CPU_AVX512_VAES);
2984       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2985         vm_features.set_feature(CPU_AVX512_VNNI);
2986       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2987         vm_features.set_feature(CPU_AVX512_BITALG);
2988       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2989         vm_features.set_feature(CPU_AVX512_VBMI);
2990       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2991         vm_features.set_feature(CPU_AVX512_VBMI2);
2992     }
2993     if (is_intel()) {
2994       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2995           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2996           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2997           xem_xcr0_eax.bits.opmask != 0 &&
2998           xem_xcr0_eax.bits.zmm512 != 0 &&
2999           xem_xcr0_eax.bits.zmm32 != 0) {
3000         vm_features.set_feature(CPU_AVX10_1);
3001         vm_features.set_feature(CPU_AVX512F);
3002         vm_features.set_feature(CPU_AVX512CD);
3003         vm_features.set_feature(CPU_AVX512DQ);
3004         vm_features.set_feature(CPU_AVX512PF);
3005         vm_features.set_feature(CPU_AVX512ER);
3006         vm_features.set_feature(CPU_AVX512BW);
3007         vm_features.set_feature(CPU_AVX512VL);
3008         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3009         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3010         vm_features.set_feature(CPU_AVX512_VAES);
3011         vm_features.set_feature(CPU_AVX512_VNNI);
3012         vm_features.set_feature(CPU_AVX512_BITALG);
3013         vm_features.set_feature(CPU_AVX512_VBMI);
3014         vm_features.set_feature(CPU_AVX512_VBMI2);
3015         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3016           vm_features.set_feature(CPU_AVX10_2);
3017         }
3018       }
3019     }
3020   }
3021 
3022   if (std_cpuid1_ecx.bits.hv != 0)
3023     vm_features.set_feature(CPU_HV);
3024   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3025     vm_features.set_feature(CPU_BMI1);
3026   if (std_cpuid1_edx.bits.tsc != 0)
3027     vm_features.set_feature(CPU_TSC);
3028   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3029     vm_features.set_feature(CPU_TSCINV_BIT);
3030   if (std_cpuid1_ecx.bits.aes != 0)
3031     vm_features.set_feature(CPU_AES);
3032   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3033     vm_features.set_feature(CPU_LZCNT);
3034   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3035     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3036   if (sef_cpuid7_ebx.bits.erms != 0)
3037     vm_features.set_feature(CPU_ERMS);
3038   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3039     vm_features.set_feature(CPU_FSRM);
3040   if (std_cpuid1_ecx.bits.clmul != 0)
3041     vm_features.set_feature(CPU_CLMUL);
3042   if (sef_cpuid7_ebx.bits.rtm != 0)
3043     vm_features.set_feature(CPU_RTM);
3044   if (sef_cpuid7_ebx.bits.adx != 0)
3045      vm_features.set_feature(CPU_ADX);
3046   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3047     vm_features.set_feature(CPU_BMI2);
3048   if (sef_cpuid7_ebx.bits.sha != 0)
3049     vm_features.set_feature(CPU_SHA);
3050   if (std_cpuid1_ecx.bits.fma != 0)
3051     vm_features.set_feature(CPU_FMA);
3052   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3053     vm_features.set_feature(CPU_FLUSHOPT);
3054   if (sef_cpuid7_ebx.bits.clwb != 0)
3055     vm_features.set_feature(CPU_CLWB);
3056   if (ext_cpuid1_edx.bits.rdtscp != 0)
3057     vm_features.set_feature(CPU_RDTSCP);
3058   if (sef_cpuid7_ecx.bits.rdpid != 0)
3059     vm_features.set_feature(CPU_RDPID);
3060 
3061   // AMD|Hygon additional features.
3062   if (is_amd_family()) {
3063     // PREFETCHW was checked above, check TDNOW here.
3064     if ((ext_cpuid1_edx.bits.tdnow != 0))
3065       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3066     if (ext_cpuid1_ecx.bits.sse4a != 0)
3067       vm_features.set_feature(CPU_SSE4A);
3068   }
3069 
3070   // Intel additional features.
3071   if (is_intel()) {
3072     if (sef_cpuid7_edx.bits.serialize != 0)
3073       vm_features.set_feature(CPU_SERIALIZE);
3074     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3075       vm_features.set_feature(CPU_AVX512_FP16);
3076   }
3077 
3078   // ZX additional features.
3079   if (is_zx()) {
3080     // We do not know if these are supported by ZX, so we cannot trust
3081     // common CPUID bit for them.
3082     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3083     vm_features.clear_feature(CPU_CLWB);
3084   }
3085 
3086   // Protection key features.
3087   if (sef_cpuid7_ecx.bits.pku != 0) {
3088     vm_features.set_feature(CPU_PKU);
3089   }
3090   if (sef_cpuid7_ecx.bits.ospke != 0) {
3091     vm_features.set_feature(CPU_OSPKE);
3092   }
3093 
3094   // Control flow enforcement (CET) features.
3095   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3096     vm_features.set_feature(CPU_CET_SS);
3097   }
3098   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3099     vm_features.set_feature(CPU_CET_IBT);
3100   }
3101 
3102   // Composite features.
3103   if (supports_tscinv_bit() &&
3104       ((is_amd_family() && !is_amd_Barcelona()) ||
3105        is_intel_tsc_synched_at_init())) {
3106     vm_features.set_feature(CPU_TSCINV);
3107   }
3108   return vm_features;
3109 }
3110 
3111 bool VM_Version::os_supports_avx_vectors() {
3112   bool retVal = false;
3113   int nreg = 4;
3114   if (supports_evex()) {
3115     // Verify that OS save/restore all bits of EVEX registers
3116     // during signal processing.
3117     retVal = true;
3118     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3119       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3120         retVal = false;
3121         break;
3122       }
3123     }
3124   } else if (supports_avx()) {
3125     // Verify that OS save/restore all bits of AVX registers
3126     // during signal processing.
3127     retVal = true;
3128     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3129       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3130         retVal = false;
3131         break;
3132       }
3133     }
3134     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3135     if (retVal == false) {
3136       // Verify that OS save/restore all bits of EVEX registers
3137       // during signal processing.
3138       retVal = true;
3139       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3140         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3141           retVal = false;
3142           break;
3143         }
3144       }
3145     }
3146   }
3147   return retVal;
3148 }
3149 
3150 bool VM_Version::os_supports_apx_egprs() {
3151   if (!supports_apx_f()) {
3152     return false;
3153   }
3154   // Enable APX support for product builds after
3155   // completion of planned features listed in JDK-8329030.
3156 #if !defined(PRODUCT)
3157   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3158       _cpuid_info.apx_save[1] != egpr_test_value()) {
3159     return false;
3160   }
3161   return true;
3162 #else
3163   return false;
3164 #endif
3165 }
3166 
3167 uint VM_Version::cores_per_cpu() {
3168   uint result = 1;
3169   if (is_intel()) {
3170     bool supports_topology = supports_processor_topology();
3171     if (supports_topology) {
3172       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3173                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3174     }
3175     if (!supports_topology || result == 0) {
3176       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3177     }
3178   } else if (is_amd_family()) {
3179     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3180   } else if (is_zx()) {
3181     bool supports_topology = supports_processor_topology();
3182     if (supports_topology) {
3183       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3184                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3185     }
3186     if (!supports_topology || result == 0) {
3187       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3188     }
3189   }
3190   return result;
3191 }
3192 
3193 uint VM_Version::threads_per_core() {
3194   uint result = 1;
3195   if (is_intel() && supports_processor_topology()) {
3196     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3197   } else if (is_zx() && supports_processor_topology()) {
3198     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3199   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3200     if (cpu_family() >= 0x17) {
3201       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3202     } else {
3203       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3204                  cores_per_cpu();
3205     }
3206   }
3207   return (result == 0 ? 1 : result);
3208 }
3209 
3210 uint VM_Version::L1_line_size() {
3211   uint result = 0;
3212   if (is_intel()) {
3213     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3214   } else if (is_amd_family()) {
3215     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3216   } else if (is_zx()) {
3217     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3218   }
3219   if (result < 32) // not defined ?
3220     result = 32;   // 32 bytes by default on x86 and other x64
3221   return result;
3222 }
3223 
3224 bool VM_Version::is_intel_tsc_synched_at_init() {
3225   if (is_intel_family_core()) {
3226     uint32_t ext_model = extended_cpu_model();
3227     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3228         ext_model == CPU_MODEL_WESTMERE_EP    ||
3229         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3230         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3231       // <= 2-socket invariant tsc support. EX versions are usually used
3232       // in > 2-socket systems and likely don't synchronize tscs at
3233       // initialization.
3234       // Code that uses tsc values must be prepared for them to arbitrarily
3235       // jump forward or backward.
3236       return true;
3237     }
3238   }
3239   return false;
3240 }
3241 
3242 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3243   // Hardware prefetching (distance/size in bytes):
3244   // Pentium 3 -  64 /  32
3245   // Pentium 4 - 256 / 128
3246   // Athlon    -  64 /  32 ????
3247   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3248   // Core      - 128 /  64
3249   //
3250   // Software prefetching (distance in bytes / instruction with best score):
3251   // Pentium 3 - 128 / prefetchnta
3252   // Pentium 4 - 512 / prefetchnta
3253   // Athlon    - 128 / prefetchnta
3254   // Opteron   - 256 / prefetchnta
3255   // Core      - 256 / prefetchnta
3256   // It will be used only when AllocatePrefetchStyle > 0
3257 
3258   if (is_amd_family()) { // AMD | Hygon
3259     if (supports_sse2()) {
3260       return 256; // Opteron
3261     } else {
3262       return 128; // Athlon
3263     }
3264   } else { // Intel
3265     if (supports_sse3() && cpu_family() == 6) {
3266       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3267         return 192;
3268       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3269         return 384;
3270       }
3271     }
3272     if (supports_sse2()) {
3273       if (cpu_family() == 6) {
3274         return 256; // Pentium M, Core, Core2
3275       } else {
3276         return 512; // Pentium 4
3277       }
3278     } else {
3279       return 128; // Pentium 3 (and all other old CPUs)
3280     }
3281   }
3282 }
3283 
3284 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3285   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3286   switch (id) {
3287   case vmIntrinsics::_floatToFloat16:
3288   case vmIntrinsics::_float16ToFloat:
3289     if (!supports_float16()) {
3290       return false;
3291     }
3292     break;
3293   default:
3294     break;
3295   }
3296   return true;
3297 }
3298 
3299 void VM_Version::insert_features_names(VM_Version::VM_Features features, char* buf, size_t buflen) {
3300   for (int i = 0; i < MAX_CPU_FEATURES; i++) {
3301     if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3302       int res = jio_snprintf(buf, buflen, ", %s", _features_names[i]);
3303       assert(res > 0, "not enough temporary space allocated");
3304       buf += res;
3305       buflen -= res;
3306     }
3307   }
3308 }