1 /*
   2  * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/icache.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/ostream.hpp"
  43 #include "utilities/powerOfTwo.hpp"
  44 #include "utilities/virtualizationSupport.hpp"
  45 
  46 int VM_Version::_cpu;
  47 int VM_Version::_model;
  48 int VM_Version::_stepping;
  49 bool VM_Version::_has_intel_jcc_erratum;
  50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  51 
  52 #define DECLARE_CPU_FEATURE_NAME(id, name) XSTR(name),
  53 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  54 #undef DECLARE_CPU_FEATURE_NAME
  55 
  56 // Address of instruction which causes SEGV
  57 address VM_Version::_cpuinfo_segv_addr = nullptr;
  58 // Address of instruction after the one which causes SEGV
  59 address VM_Version::_cpuinfo_cont_addr = nullptr;
  60 // Address of instruction which causes APX specific SEGV
  61 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  62 // Address of instruction after the one which causes APX specific SEGV
  63 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  64 
  65 static BufferBlob* stub_blob;
  66 static const int stub_size = 2550;
  67 
  68 VM_Version::VM_Features VM_Version::_features;
  69 VM_Version::VM_Features VM_Version::_cpu_features;
  70 
  71 extern "C" {
  72   typedef void (*get_cpu_info_stub_t)(void*);
  73   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  74   typedef void (*clear_apx_test_state_t)(void);
  75   typedef void (*getCPUIDBrandString_stub_t)(void*);
  76 }
  77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  78 static detect_virt_stub_t detect_virt_stub = nullptr;
  79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  80 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
  81 
  82 #define CPUID_STANDARD_FN   0x0
  83 #define CPUID_STANDARD_FN_1 0x1
  84 #define CPUID_STANDARD_FN_4 0x4
  85 #define CPUID_STANDARD_FN_B 0xb
  86 
  87 #define CPUID_EXTENDED_FN   0x80000000
  88 #define CPUID_EXTENDED_FN_1 0x80000001
  89 #define CPUID_EXTENDED_FN_2 0x80000002
  90 #define CPUID_EXTENDED_FN_3 0x80000003
  91 #define CPUID_EXTENDED_FN_4 0x80000004
  92 #define CPUID_EXTENDED_FN_7 0x80000007
  93 #define CPUID_EXTENDED_FN_8 0x80000008
  94 
  95 class VM_Version_StubGenerator: public StubCodeGenerator {
  96  public:
  97 
  98   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
  99 
 100   address clear_apx_test_state() {
 101 #   define __ _masm->
 102     address start = __ pc();
 103     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 104     // handling guarantees that preserved register values post signal handling were
 105     // re-instantiated by operating system and not because they were not modified externally.
 106 
 107     bool save_apx = UseAPX;
 108     VM_Version::set_apx_cpuFeatures();
 109     UseAPX = true;
 110     // EGPR state save/restoration.
 111     __ mov64(r16, 0L);
 112     __ mov64(r31, 0L);
 113     UseAPX = save_apx;
 114     VM_Version::clean_cpuFeatures();
 115     __ ret(0);
 116     return start;
 117   }
 118 
 119   address generate_get_cpu_info() {
 120     // Flags to test CPU type.
 121     const uint32_t HS_EFL_AC = 0x40000;
 122     const uint32_t HS_EFL_ID = 0x200000;
 123     // Values for when we don't have a CPUID instruction.
 124     const int      CPU_FAMILY_SHIFT = 8;
 125     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 126     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 127     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 128 
 129     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 130     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 131     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
 132     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 133 
 134     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 135 #   define __ _masm->
 136 
 137     address start = __ pc();
 138 
 139     //
 140     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 141     //
 142     // rcx and rdx are first and second argument registers on windows
 143 
 144     __ push(rbp);
 145     __ mov(rbp, c_rarg0); // cpuid_info address
 146     __ push(rbx);
 147     __ push(rsi);
 148     __ pushf();          // preserve rbx, and flags
 149     __ pop(rax);
 150     __ push(rax);
 151     __ mov(rcx, rax);
 152     //
 153     // if we are unable to change the AC flag, we have a 386
 154     //
 155     __ xorl(rax, HS_EFL_AC);
 156     __ push(rax);
 157     __ popf();
 158     __ pushf();
 159     __ pop(rax);
 160     __ cmpptr(rax, rcx);
 161     __ jccb(Assembler::notEqual, detect_486);
 162 
 163     __ movl(rax, CPU_FAMILY_386);
 164     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 165     __ jmp(done);
 166 
 167     //
 168     // If we are unable to change the ID flag, we have a 486 which does
 169     // not support the "cpuid" instruction.
 170     //
 171     __ bind(detect_486);
 172     __ mov(rax, rcx);
 173     __ xorl(rax, HS_EFL_ID);
 174     __ push(rax);
 175     __ popf();
 176     __ pushf();
 177     __ pop(rax);
 178     __ cmpptr(rcx, rax);
 179     __ jccb(Assembler::notEqual, detect_586);
 180 
 181     __ bind(cpu486);
 182     __ movl(rax, CPU_FAMILY_486);
 183     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 184     __ jmp(done);
 185 
 186     //
 187     // At this point, we have a chip which supports the "cpuid" instruction
 188     //
 189     __ bind(detect_586);
 190     __ xorl(rax, rax);
 191     __ cpuid();
 192     __ orl(rax, rax);
 193     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 194                                         // value of at least 1, we give up and
 195                                         // assume a 486
 196     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 197     __ movl(Address(rsi, 0), rax);
 198     __ movl(Address(rsi, 4), rbx);
 199     __ movl(Address(rsi, 8), rcx);
 200     __ movl(Address(rsi,12), rdx);
 201 
 202     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 203     __ jccb(Assembler::belowEqual, std_cpuid4);
 204 
 205     //
 206     // cpuid(0xB) Processor Topology
 207     //
 208     __ movl(rax, 0xb);
 209     __ xorl(rcx, rcx);   // Threads level
 210     __ cpuid();
 211 
 212     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 213     __ movl(Address(rsi, 0), rax);
 214     __ movl(Address(rsi, 4), rbx);
 215     __ movl(Address(rsi, 8), rcx);
 216     __ movl(Address(rsi,12), rdx);
 217 
 218     __ movl(rax, 0xb);
 219     __ movl(rcx, 1);     // Cores level
 220     __ cpuid();
 221     __ push(rax);
 222     __ andl(rax, 0x1f);  // Determine if valid topology level
 223     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 224     __ andl(rax, 0xffff);
 225     __ pop(rax);
 226     __ jccb(Assembler::equal, std_cpuid4);
 227 
 228     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 229     __ movl(Address(rsi, 0), rax);
 230     __ movl(Address(rsi, 4), rbx);
 231     __ movl(Address(rsi, 8), rcx);
 232     __ movl(Address(rsi,12), rdx);
 233 
 234     __ movl(rax, 0xb);
 235     __ movl(rcx, 2);     // Packages level
 236     __ cpuid();
 237     __ push(rax);
 238     __ andl(rax, 0x1f);  // Determine if valid topology level
 239     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 240     __ andl(rax, 0xffff);
 241     __ pop(rax);
 242     __ jccb(Assembler::equal, std_cpuid4);
 243 
 244     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 245     __ movl(Address(rsi, 0), rax);
 246     __ movl(Address(rsi, 4), rbx);
 247     __ movl(Address(rsi, 8), rcx);
 248     __ movl(Address(rsi,12), rdx);
 249 
 250     //
 251     // cpuid(0x4) Deterministic cache params
 252     //
 253     __ bind(std_cpuid4);
 254     __ movl(rax, 4);
 255     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 256     __ jccb(Assembler::greater, std_cpuid1);
 257 
 258     __ xorl(rcx, rcx);   // L1 cache
 259     __ cpuid();
 260     __ push(rax);
 261     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 262     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 263     __ pop(rax);
 264     __ jccb(Assembler::equal, std_cpuid1);
 265 
 266     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 267     __ movl(Address(rsi, 0), rax);
 268     __ movl(Address(rsi, 4), rbx);
 269     __ movl(Address(rsi, 8), rcx);
 270     __ movl(Address(rsi,12), rdx);
 271 
 272     //
 273     // Standard cpuid(0x1)
 274     //
 275     __ bind(std_cpuid1);
 276     __ movl(rax, 1);
 277     __ cpuid();
 278     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 279     __ movl(Address(rsi, 0), rax);
 280     __ movl(Address(rsi, 4), rbx);
 281     __ movl(Address(rsi, 8), rcx);
 282     __ movl(Address(rsi,12), rdx);
 283 
 284     //
 285     // Check if OS has enabled XGETBV instruction to access XCR0
 286     // (OSXSAVE feature flag) and CPU supports AVX
 287     //
 288     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 289     __ cmpl(rcx, 0x18000000);
 290     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 291 
 292     //
 293     // XCR0, XFEATURE_ENABLED_MASK register
 294     //
 295     __ xorl(rcx, rcx);   // zero for XCR0 register
 296     __ xgetbv();
 297     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 298     __ movl(Address(rsi, 0), rax);
 299     __ movl(Address(rsi, 4), rdx);
 300 
 301     //
 302     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 303     //
 304     __ bind(sef_cpuid);
 305     __ movl(rax, 7);
 306     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 307     __ jccb(Assembler::greater, ext_cpuid);
 308     // ECX = 0
 309     __ xorl(rcx, rcx);
 310     __ cpuid();
 311     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 312     __ movl(Address(rsi, 0), rax);
 313     __ movl(Address(rsi, 4), rbx);
 314     __ movl(Address(rsi, 8), rcx);
 315     __ movl(Address(rsi, 12), rdx);
 316 
 317     //
 318     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 319     //
 320     __ bind(sefsl1_cpuid);
 321     __ movl(rax, 7);
 322     __ movl(rcx, 1);
 323     __ cpuid();
 324     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 325     __ movl(Address(rsi, 0), rax);
 326     __ movl(Address(rsi, 4), rdx);
 327 
 328     //
 329     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 330     //
 331     __ bind(std_cpuid29);
 332     __ movl(rax, 0x29);
 333     __ movl(rcx, 0);
 334     __ cpuid();
 335     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 336     __ movl(Address(rsi, 0), rbx);
 337 
 338     //
 339     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 340     //
 341     __ bind(std_cpuid24);
 342     __ movl(rax, 0x24);
 343     __ movl(rcx, 0);
 344     __ cpuid();
 345     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 346     __ movl(Address(rsi, 0), rax);
 347     __ movl(Address(rsi, 4), rbx);
 348 
 349     //
 350     // Extended cpuid(0x80000000)
 351     //
 352     __ bind(ext_cpuid);
 353     __ movl(rax, 0x80000000);
 354     __ cpuid();
 355     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 356     __ jcc(Assembler::belowEqual, done);
 357     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 358     __ jcc(Assembler::belowEqual, ext_cpuid1);
 359     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 360     __ jccb(Assembler::belowEqual, ext_cpuid5);
 361     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 362     __ jccb(Assembler::belowEqual, ext_cpuid7);
 363     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 364     __ jccb(Assembler::belowEqual, ext_cpuid8);
 365     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 366     __ jccb(Assembler::below, ext_cpuid8);
 367     //
 368     // Extended cpuid(0x8000001E)
 369     //
 370     __ movl(rax, 0x8000001E);
 371     __ cpuid();
 372     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 373     __ movl(Address(rsi, 0), rax);
 374     __ movl(Address(rsi, 4), rbx);
 375     __ movl(Address(rsi, 8), rcx);
 376     __ movl(Address(rsi,12), rdx);
 377 
 378     //
 379     // Extended cpuid(0x80000008)
 380     //
 381     __ bind(ext_cpuid8);
 382     __ movl(rax, 0x80000008);
 383     __ cpuid();
 384     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 385     __ movl(Address(rsi, 0), rax);
 386     __ movl(Address(rsi, 4), rbx);
 387     __ movl(Address(rsi, 8), rcx);
 388     __ movl(Address(rsi,12), rdx);
 389 
 390     //
 391     // Extended cpuid(0x80000007)
 392     //
 393     __ bind(ext_cpuid7);
 394     __ movl(rax, 0x80000007);
 395     __ cpuid();
 396     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 397     __ movl(Address(rsi, 0), rax);
 398     __ movl(Address(rsi, 4), rbx);
 399     __ movl(Address(rsi, 8), rcx);
 400     __ movl(Address(rsi,12), rdx);
 401 
 402     //
 403     // Extended cpuid(0x80000005)
 404     //
 405     __ bind(ext_cpuid5);
 406     __ movl(rax, 0x80000005);
 407     __ cpuid();
 408     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 409     __ movl(Address(rsi, 0), rax);
 410     __ movl(Address(rsi, 4), rbx);
 411     __ movl(Address(rsi, 8), rcx);
 412     __ movl(Address(rsi,12), rdx);
 413 
 414     //
 415     // Extended cpuid(0x80000001)
 416     //
 417     __ bind(ext_cpuid1);
 418     __ movl(rax, 0x80000001);
 419     __ cpuid();
 420     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 421     __ movl(Address(rsi, 0), rax);
 422     __ movl(Address(rsi, 4), rbx);
 423     __ movl(Address(rsi, 8), rcx);
 424     __ movl(Address(rsi,12), rdx);
 425 
 426     //
 427     // Check if OS has enabled XGETBV instruction to access XCR0
 428     // (OSXSAVE feature flag) and CPU supports APX
 429     //
 430     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 431     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 432     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 433     __ movl(rax, 0x200000);
 434     __ andl(rax, Address(rsi, 4));
 435     __ jcc(Assembler::equal, vector_save_restore);
 436     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 437     __ movl(rax, 0x80000);
 438     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 439     __ jcc(Assembler::equal, vector_save_restore);
 440 
 441     bool save_apx = UseAPX;
 442     VM_Version::set_apx_cpuFeatures();
 443     UseAPX = true;
 444     __ mov64(r16, VM_Version::egpr_test_value());
 445     __ mov64(r31, VM_Version::egpr_test_value());
 446     __ xorl(rsi, rsi);
 447     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 448     // Generate SEGV
 449     __ movl(rax, Address(rsi, 0));
 450 
 451     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 452     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 453     __ movq(Address(rsi, 0), r16);
 454     __ movq(Address(rsi, 8), r31);
 455 
 456     //
 457     // Query CPUID 0xD.19 for APX XSAVE offset
 458     // Extended State Enumeration Sub-leaf 19 (APX)
 459     // EAX = size of APX state (should be 128)
 460     // EBX = offset in standard XSAVE format
 461     //
 462     __ movl(rax, 0xD);
 463     __ movl(rcx, 19);
 464     __ cpuid();
 465     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
 466     __ movl(Address(rsi, 0), rax);
 467     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
 468     __ movl(Address(rsi, 0), rbx);
 469 
 470     UseAPX = save_apx;
 471     __ bind(vector_save_restore);
 472     //
 473     // Check if OS has enabled XGETBV instruction to access XCR0
 474     // (OSXSAVE feature flag) and CPU supports AVX
 475     //
 476     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 477     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 478     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 479     __ cmpl(rcx, 0x18000000);
 480     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 481 
 482     __ movl(rax, 0x6);
 483     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 484     __ cmpl(rax, 0x6);
 485     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 486 
 487     // we need to bridge farther than imm8, so we use this island as a thunk
 488     __ bind(done);
 489     __ jmp(wrapup);
 490 
 491     __ bind(start_simd_check);
 492     // Query CPUID 0xD sub-leaf 5, 6, and 7 offsets for AVX-512 XSAVE components
 493     __ movl(rax, 0xD);
 494     __ movl(rcx, 5);
 495     __ cpuid();
 496     __ lea(rsi, Address(rbp, in_bytes(VM_Version::opmask_xstate_offset_offset())));
 497     __ movl(Address(rsi, 0), rbx);
 498 
 499     __ movl(rax, 0xD);
 500     __ movl(rcx, 6);
 501     __ cpuid();
 502     __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm0to15_hi256_xstate_offset_offset())));
 503     __ movl(Address(rsi, 0), rbx);
 504 
 505     __ movl(rax, 0xD);
 506     __ movl(rcx, 7);
 507     __ cpuid();
 508     __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm16to31_xstate_offset_offset())));
 509     __ movl(Address(rsi, 0), rbx);
 510 
 511     //
 512     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 513     // registers are not restored after a signal processing.
 514     // Generate SEGV here (reference through null)
 515     // and check upper YMM/ZMM bits after it.
 516     //
 517     int saved_useavx = UseAVX;
 518 
 519     // If UseAVX is uninitialized or is set by the user to include EVEX
 520     if (use_evex) {
 521       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 522       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 523       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 524       __ movl(rax, 0x10000);
 525       __ andl(rax, Address(rsi, 4));
 526       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 527       __ movl(rbx, 0x80000);
 528       __ andl(rbx, Address(rsi, 4));
 529       __ orl(rax, rbx);
 530       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 531       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 532       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 533       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 534       __ movl(rax, 0xE0);
 535       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 536       __ cmpl(rax, 0xE0);
 537       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 538 
 539       if (FLAG_IS_DEFAULT(UseAVX)) {
 540         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 541         __ movl(rax, Address(rsi, 0));
 542         __ cmpl(rax, 0x50654);              // If it is Skylake
 543         __ jcc(Assembler::equal, legacy_setup);
 544       }
 545       // EVEX setup: run in lowest evex mode
 546       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 547       UseAVX = 3;
 548 #ifdef _WINDOWS
 549       // xmm5-xmm15 are not preserved by caller on windows
 550       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 551       __ subptr(rsp, 64);
 552       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 553       __ subptr(rsp, 64);
 554       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 555       __ subptr(rsp, 64);
 556       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 557 #endif // _WINDOWS
 558 
 559       // load value into all 64 bytes of zmm7 register
 560       __ movl(rcx, VM_Version::ymm_test_value());
 561       __ movdl(xmm0, rcx);
 562       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 563       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 564       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 565       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 566       VM_Version::clean_cpuFeatures();
 567       __ jmp(save_restore_except);
 568     }
 569 
 570     __ bind(legacy_setup);
 571     // AVX setup
 572     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 573     UseAVX = 1;
 574 #ifdef _WINDOWS
 575     __ subptr(rsp, 32);
 576     __ vmovdqu(Address(rsp, 0), xmm7);
 577     __ subptr(rsp, 32);
 578     __ vmovdqu(Address(rsp, 0), xmm8);
 579     __ subptr(rsp, 32);
 580     __ vmovdqu(Address(rsp, 0), xmm15);
 581 #endif // _WINDOWS
 582 
 583     // load value into all 32 bytes of ymm7 register
 584     __ movl(rcx, VM_Version::ymm_test_value());
 585 
 586     __ movdl(xmm0, rcx);
 587     __ pshufd(xmm0, xmm0, 0x00);
 588     __ vinsertf128_high(xmm0, xmm0);
 589     __ vmovdqu(xmm7, xmm0);
 590     __ vmovdqu(xmm8, xmm0);
 591     __ vmovdqu(xmm15, xmm0);
 592     VM_Version::clean_cpuFeatures();
 593 
 594     __ bind(save_restore_except);
 595     __ xorl(rsi, rsi);
 596     VM_Version::set_cpuinfo_segv_addr(__ pc());
 597     // Generate SEGV
 598     __ movl(rax, Address(rsi, 0));
 599 
 600     VM_Version::set_cpuinfo_cont_addr(__ pc());
 601     // Returns here after signal. Save xmm0 to check it later.
 602 
 603     // If UseAVX is uninitialized or is set by the user to include EVEX
 604     if (use_evex) {
 605       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 606       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 607       __ movl(rax, 0x10000);
 608       __ andl(rax, Address(rsi, 4));
 609       __ jcc(Assembler::equal, legacy_save_restore);
 610       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 611       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 612       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 613       __ movl(rax, 0xE0);
 614       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 615       __ cmpl(rax, 0xE0);
 616       __ jcc(Assembler::notEqual, legacy_save_restore);
 617 
 618       if (FLAG_IS_DEFAULT(UseAVX)) {
 619         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 620         __ movl(rax, Address(rsi, 0));
 621         __ cmpl(rax, 0x50654);              // If it is Skylake
 622         __ jcc(Assembler::equal, legacy_save_restore);
 623       }
 624       // EVEX check: run in lowest evex mode
 625       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 626       UseAVX = 3;
 627       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 628       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 629       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 630       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 631       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 632 
 633 #ifdef _WINDOWS
 634       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 635       __ addptr(rsp, 64);
 636       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 637       __ addptr(rsp, 64);
 638       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 639       __ addptr(rsp, 64);
 640 #endif // _WINDOWS
 641       generate_vzeroupper(wrapup);
 642       VM_Version::clean_cpuFeatures();
 643       UseAVX = saved_useavx;
 644       __ jmp(wrapup);
 645    }
 646 
 647     __ bind(legacy_save_restore);
 648     // AVX check
 649     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 650     UseAVX = 1;
 651     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 652     __ vmovdqu(Address(rsi, 0), xmm0);
 653     __ vmovdqu(Address(rsi, 32), xmm7);
 654     __ vmovdqu(Address(rsi, 64), xmm8);
 655     __ vmovdqu(Address(rsi, 96), xmm15);
 656 
 657 #ifdef _WINDOWS
 658     __ vmovdqu(xmm15, Address(rsp, 0));
 659     __ addptr(rsp, 32);
 660     __ vmovdqu(xmm8, Address(rsp, 0));
 661     __ addptr(rsp, 32);
 662     __ vmovdqu(xmm7, Address(rsp, 0));
 663     __ addptr(rsp, 32);
 664 #endif // _WINDOWS
 665 
 666     generate_vzeroupper(wrapup);
 667     VM_Version::clean_cpuFeatures();
 668     UseAVX = saved_useavx;
 669 
 670     __ bind(wrapup);
 671     __ popf();
 672     __ pop(rsi);
 673     __ pop(rbx);
 674     __ pop(rbp);
 675     __ ret(0);
 676 
 677 #   undef __
 678 
 679     return start;
 680   };
 681   void generate_vzeroupper(Label& L_wrapup) {
 682 #   define __ _masm->
 683     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 684     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 685     __ jcc(Assembler::notEqual, L_wrapup);
 686     __ movl(rcx, 0x0FFF0FF0);
 687     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 688     __ andl(rcx, Address(rsi, 0));
 689     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 690     __ jcc(Assembler::equal, L_wrapup);
 691     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 692     __ jcc(Assembler::equal, L_wrapup);
 693     // vzeroupper() will use a pre-computed instruction sequence that we
 694     // can't compute until after we've determined CPU capabilities. Use
 695     // uncached variant here directly to be able to bootstrap correctly
 696     __ vzeroupper_uncached();
 697 #   undef __
 698   }
 699   address generate_detect_virt() {
 700     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 701 #   define __ _masm->
 702 
 703     address start = __ pc();
 704 
 705     // Evacuate callee-saved registers
 706     __ push(rbp);
 707     __ push(rbx);
 708     __ push(rsi); // for Windows
 709 
 710     __ mov(rax, c_rarg0); // CPUID leaf
 711     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 712 
 713     __ cpuid();
 714 
 715     // Store result to register array
 716     __ movl(Address(rsi,  0), rax);
 717     __ movl(Address(rsi,  4), rbx);
 718     __ movl(Address(rsi,  8), rcx);
 719     __ movl(Address(rsi, 12), rdx);
 720 
 721     // Epilogue
 722     __ pop(rsi);
 723     __ pop(rbx);
 724     __ pop(rbp);
 725     __ ret(0);
 726 
 727 #   undef __
 728 
 729     return start;
 730   };
 731 
 732 
 733   address generate_getCPUIDBrandString(void) {
 734     // Flags to test CPU type.
 735     const uint32_t HS_EFL_AC           = 0x40000;
 736     const uint32_t HS_EFL_ID           = 0x200000;
 737     // Values for when we don't have a CPUID instruction.
 738     const int      CPU_FAMILY_SHIFT = 8;
 739     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 740     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 741 
 742     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 743 
 744     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 745 #   define __ _masm->
 746 
 747     address start = __ pc();
 748 
 749     //
 750     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 751     //
 752     // rcx and rdx are first and second argument registers on windows
 753 
 754     __ push(rbp);
 755     __ mov(rbp, c_rarg0); // cpuid_info address
 756     __ push(rbx);
 757     __ push(rsi);
 758     __ pushf();          // preserve rbx, and flags
 759     __ pop(rax);
 760     __ push(rax);
 761     __ mov(rcx, rax);
 762     //
 763     // if we are unable to change the AC flag, we have a 386
 764     //
 765     __ xorl(rax, HS_EFL_AC);
 766     __ push(rax);
 767     __ popf();
 768     __ pushf();
 769     __ pop(rax);
 770     __ cmpptr(rax, rcx);
 771     __ jccb(Assembler::notEqual, detect_486);
 772 
 773     __ movl(rax, CPU_FAMILY_386);
 774     __ jmp(done);
 775 
 776     //
 777     // If we are unable to change the ID flag, we have a 486 which does
 778     // not support the "cpuid" instruction.
 779     //
 780     __ bind(detect_486);
 781     __ mov(rax, rcx);
 782     __ xorl(rax, HS_EFL_ID);
 783     __ push(rax);
 784     __ popf();
 785     __ pushf();
 786     __ pop(rax);
 787     __ cmpptr(rcx, rax);
 788     __ jccb(Assembler::notEqual, detect_586);
 789 
 790     __ bind(cpu486);
 791     __ movl(rax, CPU_FAMILY_486);
 792     __ jmp(done);
 793 
 794     //
 795     // At this point, we have a chip which supports the "cpuid" instruction
 796     //
 797     __ bind(detect_586);
 798     __ xorl(rax, rax);
 799     __ cpuid();
 800     __ orl(rax, rax);
 801     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 802                                         // value of at least 1, we give up and
 803                                         // assume a 486
 804 
 805     //
 806     // Extended cpuid(0x80000000) for processor brand string detection
 807     //
 808     __ bind(ext_cpuid);
 809     __ movl(rax, CPUID_EXTENDED_FN);
 810     __ cpuid();
 811     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 812     __ jcc(Assembler::below, done);
 813 
 814     //
 815     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 816     //
 817     __ movl(rax, CPUID_EXTENDED_FN_2);
 818     __ cpuid();
 819     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 820     __ movl(Address(rsi, 0), rax);
 821     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 822     __ movl(Address(rsi, 0), rbx);
 823     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 824     __ movl(Address(rsi, 0), rcx);
 825     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 826     __ movl(Address(rsi,0), rdx);
 827 
 828     //
 829     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 830     //
 831     __ movl(rax, CPUID_EXTENDED_FN_3);
 832     __ cpuid();
 833     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 834     __ movl(Address(rsi, 0), rax);
 835     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 836     __ movl(Address(rsi, 0), rbx);
 837     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 838     __ movl(Address(rsi, 0), rcx);
 839     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 840     __ movl(Address(rsi,0), rdx);
 841 
 842     //
 843     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 844     //
 845     __ movl(rax, CPUID_EXTENDED_FN_4);
 846     __ cpuid();
 847     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 848     __ movl(Address(rsi, 0), rax);
 849     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 850     __ movl(Address(rsi, 0), rbx);
 851     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 852     __ movl(Address(rsi, 0), rcx);
 853     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 854     __ movl(Address(rsi,0), rdx);
 855 
 856     //
 857     // return
 858     //
 859     __ bind(done);
 860     __ popf();
 861     __ pop(rsi);
 862     __ pop(rbx);
 863     __ pop(rbp);
 864     __ ret(0);
 865 
 866 #   undef __
 867 
 868     return start;
 869   };
 870 };
 871 
 872 void VM_Version::get_processor_features() {
 873 
 874   _cpu = 4; // 486 by default
 875   _model = 0;
 876   _stepping = 0;
 877   _logical_processors_per_package = 1;
 878   // i486 internal cache is both I&D and has a 16-byte line size
 879   _L1_data_cache_line_size = 16;
 880 
 881   // Get raw processor info
 882 
 883   get_cpu_info_stub(&_cpuid_info);
 884 
 885   assert_is_initialized();
 886   _cpu = extended_cpu_family();
 887   _model = extended_cpu_model();
 888   _stepping = cpu_stepping();
 889 
 890   if (cpu_family() > 4) { // it supports CPUID
 891     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 892     _cpu_features = _features; // Preserve features
 893     // Logical processors are only available on P4s and above,
 894     // and only if hyperthreading is available.
 895     _logical_processors_per_package = logical_processor_count();
 896     _L1_data_cache_line_size = L1_line_size();
 897   }
 898 
 899   // xchg and xadd instructions
 900   _supports_atomic_getset4 = true;
 901   _supports_atomic_getadd4 = true;
 902   _supports_atomic_getset8 = true;
 903   _supports_atomic_getadd8 = true;
 904 
 905   // assigning this field effectively enables Unsafe.writebackMemory()
 906   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 907   // that is only implemented on x86_64 and only if the OS plays ball
 908   if (os::supports_map_sync()) {
 909     // publish data cache line flush size to generic field, otherwise
 910     // let if default to zero thereby disabling writeback
 911     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 912   }
 913 
 914   // Check if processor has Intel Ecore
 915   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 916     (supports_hybrid() ||
 917      _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
 918      _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
 919     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 920   }
 921 
 922   if (UseSSE < 4) {
 923     clear_feature(CPU_SSE4_1);
 924     clear_feature(CPU_SSE4_2);
 925   }
 926 
 927   if (UseSSE < 3) {
 928     clear_feature(CPU_SSE3);
 929     clear_feature(CPU_SSSE3);
 930     clear_feature(CPU_SSE4A);
 931   }
 932 
 933   // ZX cpus specific settings
 934   if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
 935     if (cpu_family() == 7) {
 936       if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
 937         UseAVX = 1;
 938       } else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
 939         UseAVX = 0;
 940       }
 941     } else if (cpu_family() == 6) {
 942       UseAVX = 0;
 943     }
 944   }
 945 
 946   // UseSSE is set to the smaller of what hardware supports and what
 947   // the command line requires. i.e., you cannot set UseSSE to 4 on
 948   // older systems which do not support it.
 949   int use_sse_limit = 2;
 950   if (UseSSE > 3 && supports_sse4_1()) {
 951     use_sse_limit = 4;
 952   } else if (UseSSE > 2 && supports_sse3()) {
 953     use_sse_limit = 3;
 954   }
 955   if (FLAG_IS_DEFAULT(UseSSE)) {
 956     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 957   } else if (UseSSE > use_sse_limit) {
 958     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 959     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 960   }
 961 
 962   // first try initial setting and detect what we can support
 963   int use_avx_limit = 0;
 964   if (UseAVX > 0) {
 965     if (UseSSE < 4) {
 966       // Don't use AVX if SSE is unavailable or has been disabled.
 967       use_avx_limit = 0;
 968     } else if (UseAVX > 2 && supports_evex()) {
 969       use_avx_limit = 3;
 970     } else if (UseAVX > 1 && supports_avx2()) {
 971       use_avx_limit = 2;
 972     } else if (UseAVX > 0 && supports_avx()) {
 973       use_avx_limit = 1;
 974     } else {
 975       use_avx_limit = 0;
 976     }
 977   }
 978   if (FLAG_IS_DEFAULT(UseAVX)) {
 979     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 980     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 981       FLAG_SET_DEFAULT(UseAVX, 2);
 982     } else {
 983       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 984     }
 985   }
 986 
 987   if (UseAVX > use_avx_limit) {
 988     if (UseSSE < 4) {
 989       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 990     } else {
 991       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 992     }
 993     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 994   }
 995 
 996   if (UseAVX < 3) {
 997     clear_feature(CPU_AVX512F);
 998     clear_feature(CPU_AVX512DQ);
 999     clear_feature(CPU_AVX512CD);
1000     clear_feature(CPU_AVX512BW);
1001     clear_feature(CPU_AVX512ER);
1002     clear_feature(CPU_AVX512PF);
1003     clear_feature(CPU_AVX512VL);
1004     clear_feature(CPU_AVX512_VPOPCNTDQ);
1005     clear_feature(CPU_AVX512_VPCLMULQDQ);
1006     clear_feature(CPU_AVX512_VAES);
1007     clear_feature(CPU_AVX512_VNNI);
1008     clear_feature(CPU_AVX512_VBMI);
1009     clear_feature(CPU_AVX512_VBMI2);
1010     clear_feature(CPU_AVX512_BITALG);
1011     clear_feature(CPU_AVX512_IFMA);
1012     clear_feature(CPU_APX_F);
1013     clear_feature(CPU_AVX512_FP16);
1014     clear_feature(CPU_AVX10_1);
1015     clear_feature(CPU_AVX10_2);
1016   }
1017 
1018 
1019   if (UseAVX < 2) {
1020     clear_feature(CPU_AVX2);
1021     clear_feature(CPU_AVX_IFMA);
1022   }
1023 
1024   if (UseAVX < 1) {
1025     clear_feature(CPU_AVX);
1026     clear_feature(CPU_VZEROUPPER);
1027     clear_feature(CPU_F16C);
1028     clear_feature(CPU_SHA512);
1029   }
1030 
1031   if (logical_processors_per_package() == 1) {
1032     // HT processor could be installed on a system which doesn't support HT.
1033     clear_feature(CPU_HT);
1034   }
1035 
1036   if (is_intel()) { // Intel cpus specific settings
1037     if (is_knights_family()) {
1038       clear_feature(CPU_VZEROUPPER);
1039       clear_feature(CPU_AVX512BW);
1040       clear_feature(CPU_AVX512VL);
1041       clear_feature(CPU_APX_F);
1042       clear_feature(CPU_AVX512DQ);
1043       clear_feature(CPU_AVX512_VNNI);
1044       clear_feature(CPU_AVX512_VAES);
1045       clear_feature(CPU_AVX512_VPOPCNTDQ);
1046       clear_feature(CPU_AVX512_VPCLMULQDQ);
1047       clear_feature(CPU_AVX512_VBMI);
1048       clear_feature(CPU_AVX512_VBMI2);
1049       clear_feature(CPU_CLWB);
1050       clear_feature(CPU_FLUSHOPT);
1051       clear_feature(CPU_GFNI);
1052       clear_feature(CPU_AVX512_BITALG);
1053       clear_feature(CPU_AVX512_IFMA);
1054       clear_feature(CPU_AVX_IFMA);
1055       clear_feature(CPU_AVX512_FP16);
1056       clear_feature(CPU_AVX10_1);
1057       clear_feature(CPU_AVX10_2);
1058     }
1059   }
1060 
1061   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1062   if (supports_apx_f() && os_supports_apx_egprs() && supports_avx512vl()) {
1063     if (FLAG_IS_DEFAULT(UseAPX)) {
1064       FLAG_SET_DEFAULT(UseAPX, false); // by default UseAPX is false
1065       clear_feature(CPU_APX_F);
1066     } else if (!UseAPX) {
1067       clear_feature(CPU_APX_F);
1068     }
1069   } else {
1070     if (!os_supports_apx_egprs() || !supports_avx512vl()) {
1071       clear_feature(CPU_APX_F);
1072     }
1073     if (UseAPX) {
1074       if (!FLAG_IS_DEFAULT(UseAPX)) {
1075         warning("APX instructions are not available on this CPU");
1076       }
1077       FLAG_SET_DEFAULT(UseAPX, false);
1078     }
1079   }
1080 
1081   CHECK_CPU_FEATURE(UseCLMUL, CLMUL, supports_clmul(), "CLMUL" MULTI_INST_WARNING_MSG);
1082   CHECK_CPU_FEATURE(UseAES, AES, supports_aes(), "AES" MULTI_INST_WARNING_MSG);
1083   CHECK_CPU_FEATURE(UseFMA, FMA, supports_fma(), "FMA" MULTI_INST_WARNING_MSG);
1084   CHECK_CPU_FEATURE(UseCountLeadingZerosInstruction, LZCNT, supports_lzcnt(), "lzcnt" SINGLE_INST_WARNING_MSG);
1085   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1086   // VEX prefix is generated only when AVX > 0.
1087   CHECK_CPU_FEATURE(UseBMI1Instructions, BMI1, supports_bmi1(), "BMI1" MULTI_INST_WARNING_MSG);
1088 
1089   if (supports_bmi2() && supports_avx()) {
1090     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1091       FLAG_SET_DEFAULT(UseBMI2Instructions, true);
1092     } else if (!UseBMI2Instructions) {
1093       clear_feature(CPU_BMI2);
1094     }
1095   } else {
1096     if (!supports_avx()) {
1097       clear_feature(CPU_BMI2);
1098     }
1099     if (UseBMI2Instructions) {
1100       if (!FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1101         warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1102       }
1103       FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1104     }
1105   }
1106 
1107   CHECK_CPU_FEATURE(UsePopCountInstruction, POPCNT, supports_popcnt(), "popcnt" SINGLE_INST_WARNING_MSG);
1108   CHECK_CPU_FEATURE(UseSHA, SHA, supports_sha() || (supports_avx2() && supports_bmi2()), "SHA" MULTI_INST_WARNING_MSG);
1109 
1110   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1111     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1112     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1113   } else {
1114     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1115   }
1116 
1117   if (X86ICacheSync == -1) {
1118     // Auto-detect, choosing the best performant one that still flushes
1119     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1120     if (supports_clwb()) {
1121       FLAG_SET_ERGO(X86ICacheSync, 3);
1122     } else if (supports_clflushopt()) {
1123       FLAG_SET_ERGO(X86ICacheSync, 2);
1124     } else {
1125       FLAG_SET_ERGO(X86ICacheSync, 1);
1126     }
1127   } else {
1128     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1129       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1130     }
1131     if ((X86ICacheSync == 3) && !supports_clwb()) {
1132       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1133     }
1134     if ((X86ICacheSync == 5) && !supports_serialize()) {
1135       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1136     }
1137   }
1138 
1139   stringStream ss(2048);
1140   if (supports_hybrid()) {
1141     ss.print("(hybrid)");
1142   } else {
1143     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1144   }
1145   ss.print(" family %d model %d stepping %d microcode 0x%x",
1146            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1147   ss.print(", ");
1148   int features_offset = (int)ss.size();
1149   insert_features_names(_features, ss);
1150 
1151   _cpu_info_string = ss.as_string(true);
1152   _features_string = _cpu_info_string + features_offset;
1153 
1154   // Use AES instructions if available.
1155   if (supports_aes()) {
1156     if (supports_sse3()) {
1157       if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1158         FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1159       }
1160     } else if (UseAESIntrinsics) {
1161       // The AES intrinsic stubs require AES instruction support (of course)
1162       // but also require sse3 mode or higher for instructions it use.
1163       if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1164         warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1165       }
1166       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1167     }
1168     if (!UseAESIntrinsics) {
1169       if (UseAESCTRIntrinsics) {
1170         if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1171           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1172         }
1173         FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1174       }
1175     } else {
1176       if (supports_sse4_1()) {
1177         if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1178           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1179         }
1180       } else if (UseAESCTRIntrinsics) {
1181         // The AES-CTR intrinsic stubs require AES instruction support (of course)
1182         // but also require sse4.1 mode or higher for instructions it use.
1183         if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1184           warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1185         }
1186         FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1187       }
1188     }
1189   } else {
1190     if (!cpu_supports_aes()) {
1191       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1192         warning("AES intrinsics are not available on this CPU");
1193       }
1194       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1195       if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1196         warning("AES-CTR intrinsics are not available on this CPU");
1197       }
1198       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1199     } else if (!UseAES) {
1200       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1201         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1202       }
1203       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1204       if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1205         warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1206       }
1207       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1208     }
1209   }
1210 
1211   if (UseCLMUL && (UseSSE > 2)) {
1212     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1213       UseCRC32Intrinsics = true;
1214     }
1215   } else if (UseCRC32Intrinsics) {
1216     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1217       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1218     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1219   }
1220 
1221   if (supports_avx2()) {
1222     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1223       UseAdler32Intrinsics = true;
1224     }
1225   } else if (UseAdler32Intrinsics) {
1226     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1227       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1228     }
1229     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1230   }
1231 
1232   if (supports_sse4_2() && supports_clmul()) {
1233     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1234       UseCRC32CIntrinsics = true;
1235     }
1236   } else if (UseCRC32CIntrinsics) {
1237     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1238       warning("CRC32C intrinsics are not available on this CPU");
1239     }
1240     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1241   }
1242 
1243   // GHASH/GCM intrinsics
1244   if (UseCLMUL && (UseSSE > 2)) {
1245     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1246       UseGHASHIntrinsics = true;
1247     }
1248   } else if (UseGHASHIntrinsics) {
1249     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1250       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1251     }
1252     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1253   }
1254 
1255   // ChaCha20 Intrinsics
1256   // As long as the system supports AVX as a baseline we can do a
1257   // SIMD-enabled block function.  StubGenerator makes the determination
1258   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1259   // version.
1260   if (UseAVX >= 1) {
1261     if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1262       UseChaCha20Intrinsics = true;
1263     }
1264   } else if (UseChaCha20Intrinsics) {
1265     if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1266       warning("ChaCha20 intrinsic requires AVX instructions");
1267     }
1268     FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1269   }
1270 
1271   // Kyber Intrinsics
1272   // Currently we only have them for AVX512
1273   if (supports_evex() && supports_avx512bw()) {
1274     if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1275       UseKyberIntrinsics = true;
1276     }
1277   } else if (UseKyberIntrinsics) {
1278     if (!FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1279       warning("Intrinsics for ML-KEM are not available on this CPU.");
1280     }
1281     FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1282   }
1283 
1284   // Dilithium Intrinsics
1285   if (UseAVX > 1) {
1286       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1287           UseDilithiumIntrinsics = true;
1288       }
1289   } else if (UseDilithiumIntrinsics) {
1290     if (!FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1291       warning("Intrinsics for ML-DSA are not available on this CPU.");
1292     }
1293     FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1294   }
1295 
1296   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1297   if (UseAVX >= 2) {
1298     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1299       UseBASE64Intrinsics = true;
1300     }
1301   } else if (UseBASE64Intrinsics) {
1302     if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1303       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1304     }
1305     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1306   }
1307 
1308   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1309     UseMD5Intrinsics = true;
1310   }
1311 
1312   if (supports_sha() && supports_sse4_1() && UseSHA) {
1313     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1314       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1315     }
1316   } else if (UseSHA1Intrinsics) {
1317     if (!FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1318       warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1319     }
1320     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1321   }
1322 
1323   if (supports_sse4_1() && UseSHA) {
1324     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1325       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1326     }
1327   } else if (UseSHA256Intrinsics) {
1328     if (!FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1329       warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1330     }
1331     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1332   }
1333 
1334   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1335     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1336       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1337     }
1338   } else if (UseSHA512Intrinsics) {
1339     if (!FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1340       warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1341     }
1342     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1343   }
1344 
1345   if (UseSHA && ((supports_evex() && supports_avx512vlbw()) ||
1346       (EnableX86ECoreOpts && !supports_hybrid()))) {
1347     if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1348       FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1349     }
1350   } else if (UseSHA3Intrinsics) {
1351     if (!FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1352       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1353     }
1354     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1355   }
1356 
1357 #ifdef COMPILER2
1358   int max_vector_size = 0;
1359   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1360     // 16 byte vectors (in XMM) are supported with SSE2+
1361     max_vector_size = 16;
1362   } else if (UseAVX == 1 || UseAVX == 2) {
1363     // 32 bytes vectors (in YMM) are only supported with AVX+
1364     max_vector_size = 32;
1365   } else if (UseAVX > 2) {
1366     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1367     max_vector_size = 64;
1368   }
1369 
1370   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1371 
1372   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1373     if (MaxVectorSize < min_vector_size) {
1374       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1375       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1376     }
1377     if (MaxVectorSize > max_vector_size) {
1378       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1379       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1380     }
1381     if (!is_power_of_2(MaxVectorSize)) {
1382       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1383       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1384     }
1385   } else {
1386     // If default, use highest supported configuration
1387     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1388   }
1389 
1390 #ifdef ASSERT
1391   if (MaxVectorSize > 0) {
1392     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1393       tty->print_cr("State of YMM registers after signal handle:");
1394       int nreg = 4;
1395       const char* ymm_name[4] = {"0", "7", "8", "15"};
1396       for (int i = 0; i < nreg; i++) {
1397         tty->print("YMM%s:", ymm_name[i]);
1398         for (int j = 7; j >=0; j--) {
1399           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1400         }
1401         tty->cr();
1402       }
1403     }
1404   }
1405 #endif // ASSERT
1406 
1407   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1408     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1409       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1410     }
1411   } else if (UsePoly1305Intrinsics) {
1412     if (!FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1413       warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1414     }
1415     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1416   }
1417 
1418   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1419     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1420       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1421     }
1422   } else if (UseIntPolyIntrinsics) {
1423     if (!FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1424       warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1425     }
1426     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1427   }
1428 
1429   if (FLAG_IS_DEFAULT(UseIntPoly25519Intrinsics)) {
1430     UseIntPoly25519Intrinsics = true;
1431   }
1432 
1433   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1434     UseMultiplyToLenIntrinsic = true;
1435   }
1436   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1437     UseSquareToLenIntrinsic = true;
1438   }
1439   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1440     UseMulAddIntrinsic = true;
1441   }
1442   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1443     UseMontgomeryMultiplyIntrinsic = true;
1444   }
1445   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1446     UseMontgomerySquareIntrinsic = true;
1447   }
1448 #endif // COMPILER2
1449 
1450   // On new cpus instructions which update whole XMM register should be used
1451   // to prevent partial register stall due to dependencies on high half.
1452   //
1453   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1454   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1455   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1456   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1457 
1458 
1459   if (is_zx()) { // ZX cpus specific settings
1460     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1461       UseStoreImmI16 = false; // don't use it on ZX cpus
1462     }
1463     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1464       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1465         // Use it on all ZX cpus
1466         UseAddressNop = true;
1467       }
1468     }
1469     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1470       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1471     }
1472     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1473       if (supports_sse3()) {
1474         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1475       } else {
1476         UseXmmRegToRegMoveAll = false;
1477       }
1478     }
1479     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1480 #ifdef COMPILER2
1481       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1482         // For new ZX cpus do the next optimization:
1483         // don't align the beginning of a loop if there are enough instructions
1484         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1485         // in current fetch line (OptoLoopAlignment) or the padding
1486         // is big (> MaxLoopPad).
1487         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1488         // generated NOP instructions. 11 is the largest size of one
1489         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1490         MaxLoopPad = 11;
1491       }
1492 #endif // COMPILER2
1493       if (supports_sse4_2()) { // new ZX cpus
1494         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1495           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1496         }
1497       }
1498     }
1499 
1500     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1501       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1502     }
1503   }
1504 
1505   if (is_amd_family()) { // AMD cpus specific settings
1506     if (FLAG_IS_DEFAULT(UseAddressNop)) {
1507       // Use it on new AMD cpus starting from Opteron.
1508       UseAddressNop = true;
1509     }
1510     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1511       if (supports_sse4a()) {
1512         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1513       } else {
1514         UseXmmLoadAndClearUpper = false;
1515       }
1516     }
1517     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1518       if (supports_sse4a()) {
1519         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1520       } else {
1521         UseXmmRegToRegMoveAll = false;
1522       }
1523     }
1524     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1525       if (supports_sse4a()) {
1526         UseXmmI2F = true;
1527       } else {
1528         UseXmmI2F = false;
1529       }
1530     }
1531     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1532       if (supports_sse4a()) {
1533         UseXmmI2D = true;
1534       } else {
1535         UseXmmI2D = false;
1536       }
1537     }
1538 
1539     // some defaults for AMD family 15h
1540     if (cpu_family() == 0x15) {
1541       // On family 15h processors default is no sw prefetch
1542       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1543         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1544       }
1545       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1546       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1547         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1548       }
1549       if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1550         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1551       }
1552     }
1553 
1554 #ifdef COMPILER2
1555     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1556       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1557       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1558     }
1559 #endif // COMPILER2
1560 
1561     // Some defaults for AMD family >= 17h && Hygon family 18h
1562     if (cpu_family() >= 0x17) {
1563       // On family >=17h processors use XMM and UnalignedLoadStores
1564       // for Array Copy
1565       if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1566         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1567       }
1568 #ifdef COMPILER2
1569       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1570         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1571       }
1572 #endif
1573     }
1574   }
1575 
1576   if (is_intel()) { // Intel cpus specific settings
1577     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1578       UseStoreImmI16 = false; // don't use it on Intel cpus
1579     }
1580     if (is_intel_server_family() || cpu_family() == 15) {
1581       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1582         // Use it on all Intel cpus starting from PentiumPro
1583         UseAddressNop = true;
1584       }
1585     }
1586     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1587       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1588     }
1589     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1590       if (supports_sse3()) {
1591         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1592       } else {
1593         UseXmmRegToRegMoveAll = false;
1594       }
1595     }
1596     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1597 #ifdef COMPILER2
1598       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1599         // For new Intel cpus do the next optimization:
1600         // don't align the beginning of a loop if there are enough instructions
1601         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1602         // in current fetch line (OptoLoopAlignment) or the padding
1603         // is big (> MaxLoopPad).
1604         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1605         // generated NOP instructions. 11 is the largest size of one
1606         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1607         MaxLoopPad = 11;
1608       }
1609 #endif // COMPILER2
1610 
1611       if (is_intel_modern_cpu()) { // Newest Intel cpus
1612         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1613           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1614         }
1615       }
1616     }
1617     if (is_atom_family() || is_knights_family()) {
1618 #ifdef COMPILER2
1619       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1620         OptoScheduling = true;
1621       }
1622 #endif
1623       if (supports_sse4_2()) { // Silvermont
1624         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1625           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1626         }
1627       }
1628       if (FLAG_IS_DEFAULT(UseIncDec)) {
1629         FLAG_SET_DEFAULT(UseIncDec, false);
1630       }
1631     }
1632     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1633       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1634     }
1635   }
1636 
1637 #ifdef COMPILER2
1638   if (UseAVX > 2) {
1639     if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1640         (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1641          ArrayOperationPartialInlineSize != 0 &&
1642          ArrayOperationPartialInlineSize != 16 &&
1643          ArrayOperationPartialInlineSize != 32 &&
1644          ArrayOperationPartialInlineSize != 64)) {
1645       int inline_size = 0;
1646       if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1647         inline_size = 64;
1648       } else if (MaxVectorSize >= 32) {
1649         inline_size = 32;
1650       } else if (MaxVectorSize >= 16) {
1651         inline_size = 16;
1652       }
1653       if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1654         warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1655       }
1656       ArrayOperationPartialInlineSize = inline_size;
1657     }
1658 
1659     if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1660       ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1661       if (ArrayOperationPartialInlineSize) {
1662         warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1663       } else {
1664         warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1665       }
1666     }
1667   }
1668 
1669   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1670     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1671       OptimizeFill = false;
1672     }
1673   }
1674 #endif
1675   if (supports_sse4_2()) {
1676     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1677       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1678     }
1679   } else if (UseSSE42Intrinsics) {
1680     if (!FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1681       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1682     }
1683     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1684   }
1685   if (UseSSE42Intrinsics) {
1686     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1687       UseVectorizedMismatchIntrinsic = true;
1688     }
1689   } else if (UseVectorizedMismatchIntrinsic) {
1690     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1691       warning("vectorizedMismatch intrinsics are not available on this CPU");
1692     }
1693     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1694   }
1695   if (UseAVX >= 2) {
1696     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1697   } else if (UseVectorizedHashCodeIntrinsic) {
1698     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1699       warning("vectorizedHashCode intrinsics are not available on this CPU");
1700     }
1701     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1702   }
1703 
1704   // Use count trailing zeros instruction if available
1705   if (supports_bmi1()) {
1706     // tzcnt does not require VEX prefix
1707     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1708       UseCountTrailingZerosInstruction = true;
1709     }
1710   } else if (UseCountTrailingZerosInstruction) {
1711     if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1712       warning("tzcnt instruction is not available on this CPU");
1713     }
1714     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1715   }
1716 
1717   // Use fast-string operations if available.
1718   if (supports_erms()) {
1719     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1720       UseFastStosb = true;
1721     }
1722   } else if (UseFastStosb) {
1723     if (!FLAG_IS_DEFAULT(UseFastStosb)) {
1724       warning("fast-string operations are not available on this CPU");
1725     }
1726     FLAG_SET_DEFAULT(UseFastStosb, false);
1727   }
1728 
1729   // For AMD Processors use XMM/YMM MOVDQU instructions
1730   // for Object Initialization as default
1731   if (is_amd() && cpu_family() >= 0x19) {
1732     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1733       UseFastStosb = false;
1734     }
1735   }
1736 
1737 #ifdef COMPILER2
1738   if (is_intel() && MaxVectorSize > 16) {
1739     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1740       UseFastStosb = false;
1741     }
1742   }
1743 #endif
1744 
1745   // Use XMM/YMM MOVDQU instruction for Object Initialization
1746   if (!UseFastStosb && UseUnalignedLoadStores) {
1747     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1748       UseXMMForObjInit = true;
1749     }
1750   } else if (UseXMMForObjInit) {
1751     if (!FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1752       warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1753     }
1754     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1755   }
1756 
1757 #ifdef COMPILER2
1758   if (FLAG_IS_DEFAULT(AlignVector)) {
1759     // Modern processors allow misaligned memory operations for vectors.
1760     AlignVector = !UseUnalignedLoadStores;
1761   }
1762 #endif // COMPILER2
1763 
1764   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1765     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1766       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1767     }
1768   }
1769 
1770   // Allocation prefetch settings
1771   int cache_line_size = checked_cast<int>(prefetch_data_size());
1772   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1773       (cache_line_size > AllocatePrefetchStepSize)) {
1774     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1775   }
1776 
1777   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1778     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1779     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1780       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1781     }
1782     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1783   }
1784 
1785   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1786     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1787     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1788   }
1789 
1790   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1791     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1792         is_intel_modern_cpu()) { // Nehalem based cpus
1793       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1794     }
1795 #ifdef COMPILER2
1796     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1797       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1798     }
1799 #endif
1800   }
1801 
1802   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1803 #ifdef COMPILER2
1804     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1805       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1806     }
1807 #endif
1808   }
1809 
1810   // Prefetch settings
1811 
1812   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1813   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1814   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1815   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1816 
1817   // gc copy/scan is disabled if prefetchw isn't supported, because
1818   // Prefetch::write emits an inlined prefetchw on Linux.
1819   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1820   // The used prefetcht0 instruction works for both amd64 and em64t.
1821 
1822   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1823     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1824   }
1825   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1826     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1827   }
1828 
1829   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1830      (cache_line_size > ContendedPaddingWidth))
1831     ContendedPaddingWidth = cache_line_size;
1832 
1833   // This machine allows unaligned memory accesses
1834   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1835     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1836   }
1837 
1838 #ifndef PRODUCT
1839   if (log_is_enabled(Info, os, cpu)) {
1840     LogStream ls(Log(os, cpu)::info());
1841     outputStream* log = &ls;
1842     log->print_cr("Logical CPUs per core: %u",
1843                   logical_processors_per_package());
1844     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1845     log->print("UseSSE=%d", UseSSE);
1846     if (UseAVX > 0) {
1847       log->print("  UseAVX=%d", UseAVX);
1848     }
1849     if (UseAES) {
1850       log->print("  UseAES=1");
1851     }
1852 #ifdef COMPILER2
1853     if (MaxVectorSize > 0) {
1854       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1855     }
1856 #endif
1857     log->cr();
1858     log->print("Allocation");
1859     if (AllocatePrefetchStyle <= 0) {
1860       log->print_cr(": no prefetching");
1861     } else {
1862       log->print(" prefetching: ");
1863       if (AllocatePrefetchInstr == 0) {
1864         log->print("PREFETCHNTA");
1865       } else if (AllocatePrefetchInstr == 1) {
1866         log->print("PREFETCHT0");
1867       } else if (AllocatePrefetchInstr == 2) {
1868         log->print("PREFETCHT2");
1869       } else if (AllocatePrefetchInstr == 3) {
1870         log->print("PREFETCHW");
1871       }
1872       if (AllocatePrefetchLines > 1) {
1873         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1874       } else {
1875         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1876       }
1877     }
1878 
1879     if (PrefetchCopyIntervalInBytes > 0) {
1880       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1881     }
1882     if (PrefetchScanIntervalInBytes > 0) {
1883       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1884     }
1885     if (ContendedPaddingWidth > 0) {
1886       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1887     }
1888   }
1889 #endif // !PRODUCT
1890   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1891       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1892   }
1893   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1894       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1895   }
1896   // CopyAVX3Threshold is the threshold at which 64-byte vector instructions
1897   // are used for implementing the array copy, fill and clear operations.
1898   // The Intel platforms that support the serialize instruction and the AMD
1899   // platforms with native 512-bit datapath have improved implementation of
1900   // 64-byte load/stores and so the default threshold is set to 0 for these
1901   // platforms.
1902   if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
1903     if (is_intel() && is_intel_server_family() && supports_serialize()) {
1904       FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1905     } else if (is_amd() && is_amd_avx512_datapath_server_family()) {
1906       FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1907     } else {
1908       FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
1909     }
1910   }
1911 }
1912 
1913 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1914   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1915   if (vrt == XenHVM) {
1916     st->print_cr("Xen hardware-assisted virtualization detected");
1917   } else if (vrt == KVM) {
1918     st->print_cr("KVM virtualization detected");
1919   } else if (vrt == VMWare) {
1920     st->print_cr("VMWare virtualization detected");
1921     VirtualizationSupport::print_virtualization_info(st);
1922   } else if (vrt == HyperV) {
1923     st->print_cr("Hyper-V virtualization detected");
1924   } else if (vrt == HyperVRole) {
1925     st->print_cr("Hyper-V role detected");
1926   }
1927 }
1928 
1929 bool VM_Version::compute_has_intel_jcc_erratum() {
1930   if (!is_intel_family_core()) {
1931     // Only Intel CPUs are affected.
1932     return false;
1933   }
1934   // The following table of affected CPUs is based on the following document released by Intel:
1935   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1936   switch (_model) {
1937   case 0x8E:
1938     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1939     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1940     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1941     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1942     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1943     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1944     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1945     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1946     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1947     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1948   case 0x4E:
1949     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1950     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1951     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1952     return _stepping == 0x3;
1953   case 0x55:
1954     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1955     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1956     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1957     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1958     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1959     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1960     return _stepping == 0x4 || _stepping == 0x7;
1961   case 0x5E:
1962     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1963     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1964     return _stepping == 0x3;
1965   case 0x9E:
1966     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1967     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1968     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1969     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1970     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1971     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1972     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1973     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1974     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1975     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1976     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1977     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1978     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1979     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1980     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1981   case 0xA5:
1982     // Not in Intel documentation.
1983     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
1984     return true;
1985   case 0xA6:
1986     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
1987     return _stepping == 0x0;
1988   case 0xAE:
1989     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
1990     return _stepping == 0xA;
1991   default:
1992     // If we are running on another intel machine not recognized in the table, we are okay.
1993     return false;
1994   }
1995 }
1996 
1997 // On Xen, the cpuid instruction returns
1998 //  eax / registers[0]: Version of Xen
1999 //  ebx / registers[1]: chars 'XenV'
2000 //  ecx / registers[2]: chars 'MMXe'
2001 //  edx / registers[3]: chars 'nVMM'
2002 //
2003 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2004 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2005 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2006 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2007 //
2008 // more information :
2009 // https://kb.vmware.com/s/article/1009458
2010 //
2011 void VM_Version::check_virtualizations() {
2012   uint32_t registers[4] = {0};
2013   char signature[13] = {0};
2014 
2015   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2016   // from 0x40000000 until 0x40010000.
2017   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2018   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2019     detect_virt_stub(leaf, registers);
2020     memcpy(signature, &registers[1], 12);
2021 
2022     if (strncmp("VMwareVMware", signature, 12) == 0) {
2023       Abstract_VM_Version::_detected_virtualization = VMWare;
2024       // check for extended metrics from guestlib
2025       VirtualizationSupport::initialize();
2026     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2027       Abstract_VM_Version::_detected_virtualization = HyperV;
2028 #ifdef _WINDOWS
2029       // CPUID leaf 0x40000007 is available to the root partition only.
2030       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2031       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2032       detect_virt_stub(0x40000007, registers);
2033       if ((registers[0] != 0x0) ||
2034           (registers[1] != 0x0) ||
2035           (registers[2] != 0x0) ||
2036           (registers[3] != 0x0)) {
2037         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2038       }
2039 #endif
2040     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2041       Abstract_VM_Version::_detected_virtualization = KVM;
2042     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2043       Abstract_VM_Version::_detected_virtualization = XenHVM;
2044     }
2045   }
2046 }
2047 
2048 #ifdef COMPILER2
2049 // Determine if it's running on Cascade Lake using default options.
2050 bool VM_Version::is_default_intel_cascade_lake() {
2051   return FLAG_IS_DEFAULT(UseAVX) &&
2052          FLAG_IS_DEFAULT(MaxVectorSize) &&
2053          UseAVX > 2 &&
2054          is_intel_cascade_lake();
2055 }
2056 #endif
2057 
2058 bool VM_Version::is_intel_cascade_lake() {
2059   return is_intel_skylake() && _stepping >= 5;
2060 }
2061 
2062 bool VM_Version::is_intel_darkmont() {
2063   return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2064 }
2065 
2066 void VM_Version::clear_apx_test_state() {
2067   clear_apx_test_state_stub();
2068 }
2069 
2070 static bool _vm_version_initialized = false;
2071 
2072 void VM_Version::initialize() {
2073   ResourceMark rm;
2074 
2075   // Making this stub must be FIRST use of assembler
2076   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2077   if (stub_blob == nullptr) {
2078     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2079   }
2080   CodeBuffer c(stub_blob);
2081   VM_Version_StubGenerator g(&c);
2082 
2083   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2084                                      g.generate_get_cpu_info());
2085   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2086                                      g.generate_detect_virt());
2087   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2088                                      g.clear_apx_test_state());
2089   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2090                                      g.generate_getCPUIDBrandString());
2091   get_processor_features();
2092 
2093   Assembler::precompute_instructions();
2094 
2095   if (VM_Version::supports_hv()) { // Supports hypervisor
2096     check_virtualizations();
2097   }
2098   _vm_version_initialized = true;
2099 }
2100 
2101 typedef enum {
2102    CPU_FAMILY_8086_8088  = 0,
2103    CPU_FAMILY_INTEL_286  = 2,
2104    CPU_FAMILY_INTEL_386  = 3,
2105    CPU_FAMILY_INTEL_486  = 4,
2106    CPU_FAMILY_PENTIUM    = 5,
2107    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2108    CPU_FAMILY_PENTIUM_4  = 0xF
2109 } FamilyFlag;
2110 
2111 typedef enum {
2112   RDTSCP_FLAG  = 0x08000000, // bit 27
2113   INTEL64_FLAG = 0x20000000  // bit 29
2114 } _featureExtendedEdxFlag;
2115 
2116 typedef enum {
2117    FPU_FLAG     = 0x00000001,
2118    VME_FLAG     = 0x00000002,
2119    DE_FLAG      = 0x00000004,
2120    PSE_FLAG     = 0x00000008,
2121    TSC_FLAG     = 0x00000010,
2122    MSR_FLAG     = 0x00000020,
2123    PAE_FLAG     = 0x00000040,
2124    MCE_FLAG     = 0x00000080,
2125    CX8_FLAG     = 0x00000100,
2126    APIC_FLAG    = 0x00000200,
2127    SEP_FLAG     = 0x00000800,
2128    MTRR_FLAG    = 0x00001000,
2129    PGE_FLAG     = 0x00002000,
2130    MCA_FLAG     = 0x00004000,
2131    CMOV_FLAG    = 0x00008000,
2132    PAT_FLAG     = 0x00010000,
2133    PSE36_FLAG   = 0x00020000,
2134    PSNUM_FLAG   = 0x00040000,
2135    CLFLUSH_FLAG = 0x00080000,
2136    DTS_FLAG     = 0x00200000,
2137    ACPI_FLAG    = 0x00400000,
2138    MMX_FLAG     = 0x00800000,
2139    FXSR_FLAG    = 0x01000000,
2140    SSE_FLAG     = 0x02000000,
2141    SSE2_FLAG    = 0x04000000,
2142    SS_FLAG      = 0x08000000,
2143    HTT_FLAG     = 0x10000000,
2144    TM_FLAG      = 0x20000000
2145 } FeatureEdxFlag;
2146 
2147 // VM_Version statics
2148 enum {
2149   ExtendedFamilyIdLength_INTEL = 16,
2150   ExtendedFamilyIdLength_AMD   = 24
2151 };
2152 
2153 const size_t VENDOR_LENGTH = 13;
2154 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2155 static char* _cpu_brand_string = nullptr;
2156 static int64_t _max_qualified_cpu_frequency = 0;
2157 
2158 static int _no_of_threads = 0;
2159 static int _no_of_cores = 0;
2160 
2161 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2162   "8086/8088",
2163   "",
2164   "286",
2165   "386",
2166   "486",
2167   "Pentium",
2168   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2169   "",
2170   "",
2171   "",
2172   "",
2173   "",
2174   "",
2175   "",
2176   "",
2177   "Pentium 4"
2178 };
2179 
2180 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2181   "",
2182   "",
2183   "",
2184   "",
2185   "5x86",
2186   "K5/K6",
2187   "Athlon/AthlonXP",
2188   "",
2189   "",
2190   "",
2191   "",
2192   "",
2193   "",
2194   "",
2195   "",
2196   "Opteron/Athlon64",
2197   "Opteron QC/Phenom",  // Barcelona et.al.
2198   "",
2199   "",
2200   "",
2201   "",
2202   "",
2203   "",
2204   "Zen"
2205 };
2206 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2207 // September 2013, Vol 3C Table 35-1
2208 const char* const _model_id_pentium_pro[] = {
2209   "",
2210   "Pentium Pro",
2211   "",
2212   "Pentium II model 3",
2213   "",
2214   "Pentium II model 5/Xeon/Celeron",
2215   "Celeron",
2216   "Pentium III/Pentium III Xeon",
2217   "Pentium III/Pentium III Xeon",
2218   "Pentium M model 9",    // Yonah
2219   "Pentium III, model A",
2220   "Pentium III, model B",
2221   "",
2222   "Pentium M model D",    // Dothan
2223   "",
2224   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2225   "",
2226   "",
2227   "",
2228   "",
2229   "",
2230   "",
2231   "Celeron",              // 0x16 Celeron 65nm
2232   "Core 2",               // 0x17 Penryn / Harpertown
2233   "",
2234   "",
2235   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2236   "Atom",                 // 0x1B Z5xx series Silverthorn
2237   "",
2238   "Core 2",               // 0x1D Dunnington (6-core)
2239   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2240   "",
2241   "",
2242   "",
2243   "",
2244   "",
2245   "",
2246   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2247   "",
2248   "",
2249   "",                     // 0x28
2250   "",
2251   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2252   "",
2253   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2254   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2255   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2256   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2257   "",
2258   "",
2259   "",
2260   "",
2261   "",
2262   "",
2263   "",
2264   "",
2265   "",
2266   "",
2267   "Ivy Bridge",           // 0x3a
2268   "",
2269   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2270   "",                     // 0x3d "Next Generation Intel Core Processor"
2271   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2272   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2273   "",
2274   "",
2275   "",
2276   "",
2277   "",
2278   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2279   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2280   nullptr
2281 };
2282 
2283 /* Brand ID is for back compatibility
2284  * Newer CPUs uses the extended brand string */
2285 const char* const _brand_id[] = {
2286   "",
2287   "Celeron processor",
2288   "Pentium III processor",
2289   "Intel Pentium III Xeon processor",
2290   "",
2291   "",
2292   "",
2293   "",
2294   "Intel Pentium 4 processor",
2295   nullptr
2296 };
2297 
2298 
2299 const char* const _feature_edx_id[] = {
2300   "On-Chip FPU",
2301   "Virtual Mode Extensions",
2302   "Debugging Extensions",
2303   "Page Size Extensions",
2304   "Time Stamp Counter",
2305   "Model Specific Registers",
2306   "Physical Address Extension",
2307   "Machine Check Exceptions",
2308   "CMPXCHG8B Instruction",
2309   "On-Chip APIC",
2310   "",
2311   "Fast System Call",
2312   "Memory Type Range Registers",
2313   "Page Global Enable",
2314   "Machine Check Architecture",
2315   "Conditional Mov Instruction",
2316   "Page Attribute Table",
2317   "36-bit Page Size Extension",
2318   "Processor Serial Number",
2319   "CLFLUSH Instruction",
2320   "",
2321   "Debug Trace Store feature",
2322   "ACPI registers in MSR space",
2323   "Intel Architecture MMX Technology",
2324   "Fast Float Point Save and Restore",
2325   "Streaming SIMD extensions",
2326   "Streaming SIMD extensions 2",
2327   "Self-Snoop",
2328   "Hyper Threading",
2329   "Thermal Monitor",
2330   "",
2331   "Pending Break Enable"
2332 };
2333 
2334 const char* const _feature_extended_edx_id[] = {
2335   "",
2336   "",
2337   "",
2338   "",
2339   "",
2340   "",
2341   "",
2342   "",
2343   "",
2344   "",
2345   "",
2346   "SYSCALL/SYSRET",
2347   "",
2348   "",
2349   "",
2350   "",
2351   "",
2352   "",
2353   "",
2354   "",
2355   "Execute Disable Bit",
2356   "",
2357   "",
2358   "",
2359   "",
2360   "",
2361   "",
2362   "RDTSCP",
2363   "",
2364   "Intel 64 Architecture",
2365   "",
2366   ""
2367 };
2368 
2369 const char* const _feature_ecx_id[] = {
2370   "Streaming SIMD Extensions 3",
2371   "PCLMULQDQ",
2372   "64-bit DS Area",
2373   "MONITOR/MWAIT instructions",
2374   "CPL Qualified Debug Store",
2375   "Virtual Machine Extensions",
2376   "Safer Mode Extensions",
2377   "Enhanced Intel SpeedStep technology",
2378   "Thermal Monitor 2",
2379   "Supplemental Streaming SIMD Extensions 3",
2380   "L1 Context ID",
2381   "",
2382   "Fused Multiply-Add",
2383   "CMPXCHG16B",
2384   "xTPR Update Control",
2385   "Perfmon and Debug Capability",
2386   "",
2387   "Process-context identifiers",
2388   "Direct Cache Access",
2389   "Streaming SIMD extensions 4.1",
2390   "Streaming SIMD extensions 4.2",
2391   "x2APIC",
2392   "MOVBE",
2393   "Popcount instruction",
2394   "TSC-Deadline",
2395   "AESNI",
2396   "XSAVE",
2397   "OSXSAVE",
2398   "AVX",
2399   "F16C",
2400   "RDRAND",
2401   ""
2402 };
2403 
2404 const char* const _feature_extended_ecx_id[] = {
2405   "LAHF/SAHF instruction support",
2406   "Core multi-processor legacy mode",
2407   "",
2408   "",
2409   "",
2410   "Advanced Bit Manipulations: LZCNT",
2411   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2412   "Misaligned SSE mode",
2413   "",
2414   "",
2415   "",
2416   "",
2417   "",
2418   "",
2419   "",
2420   "",
2421   "",
2422   "",
2423   "",
2424   "",
2425   "",
2426   "",
2427   "",
2428   "",
2429   "",
2430   "",
2431   "",
2432   "",
2433   "",
2434   "",
2435   "",
2436   ""
2437 };
2438 
2439 const char* VM_Version::cpu_model_description(void) {
2440   uint32_t cpu_family = extended_cpu_family();
2441   uint32_t cpu_model = extended_cpu_model();
2442   const char* model = nullptr;
2443 
2444   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2445     for (uint32_t i = 0; i <= cpu_model; i++) {
2446       model = _model_id_pentium_pro[i];
2447       if (model == nullptr) {
2448         break;
2449       }
2450     }
2451   }
2452   return model;
2453 }
2454 
2455 const char* VM_Version::cpu_brand_string(void) {
2456   if (_cpu_brand_string == nullptr) {
2457     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2458     if (nullptr == _cpu_brand_string) {
2459       return nullptr;
2460     }
2461     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2462     if (ret_val != OS_OK) {
2463       FREE_C_HEAP_ARRAY(_cpu_brand_string);
2464       _cpu_brand_string = nullptr;
2465     }
2466   }
2467   return _cpu_brand_string;
2468 }
2469 
2470 const char* VM_Version::cpu_brand(void) {
2471   const char*  brand  = nullptr;
2472 
2473   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2474     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2475     brand = _brand_id[0];
2476     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2477       brand = _brand_id[i];
2478     }
2479   }
2480   return brand;
2481 }
2482 
2483 bool VM_Version::cpu_is_em64t(void) {
2484   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2485 }
2486 
2487 bool VM_Version::is_netburst(void) {
2488   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2489 }
2490 
2491 bool VM_Version::supports_tscinv_ext(void) {
2492   if (!supports_tscinv_bit()) {
2493     return false;
2494   }
2495 
2496   if (is_intel()) {
2497     return true;
2498   }
2499 
2500   if (is_amd()) {
2501     return !is_amd_Barcelona();
2502   }
2503 
2504   if (is_hygon()) {
2505     return true;
2506   }
2507 
2508   return false;
2509 }
2510 
2511 void VM_Version::resolve_cpu_information_details(void) {
2512 
2513   // in future we want to base this information on proper cpu
2514   // and cache topology enumeration such as:
2515   // Intel 64 Architecture Processor Topology Enumeration
2516   // which supports system cpu and cache topology enumeration
2517   // either using 2xAPICIDs or initial APICIDs
2518 
2519   // currently only rough cpu information estimates
2520   // which will not necessarily reflect the exact configuration of the system
2521 
2522   // this is the number of logical hardware threads
2523   // visible to the operating system
2524   _no_of_threads = os::processor_count();
2525 
2526   // find out number of threads per cpu package
2527   int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2528   if (threads_per_package == 0) {
2529     // Fallback code to avoid div by zero in subsequent code.
2530     // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2531     threads_per_package = threads_per_core() * cores_per_cpu();
2532   }
2533 
2534   // use amount of threads visible to the process in order to guess number of sockets
2535   _no_of_sockets = _no_of_threads / threads_per_package;
2536 
2537   // process might only see a subset of the total number of threads
2538   // from a single processor package. Virtualization/resource management for example.
2539   // If so then just write a hard 1 as num of pkgs.
2540   if (0 == _no_of_sockets) {
2541     _no_of_sockets = 1;
2542   }
2543 
2544   // estimate the number of cores
2545   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2546 }
2547 
2548 
2549 const char* VM_Version::cpu_family_description(void) {
2550   int cpu_family_id = extended_cpu_family();
2551   if (is_amd()) {
2552     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2553       return _family_id_amd[cpu_family_id];
2554     }
2555   }
2556   if (is_intel()) {
2557     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2558       return cpu_model_description();
2559     }
2560     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2561       return _family_id_intel[cpu_family_id];
2562     }
2563   }
2564   if (is_zx()) {
2565     int cpu_model_id = extended_cpu_model();
2566     if (cpu_family_id == 7) {
2567       switch (cpu_model_id) {
2568         case 0x1B:
2569           return "wudaokou";
2570         case 0x3B:
2571           return "lujiazui";
2572         case 0x5B:
2573           return "yongfeng";
2574         case 0x6B:
2575           return "shijidadao";
2576       }
2577     } else if (cpu_family_id == 6) {
2578       return "zhangjiang";
2579     }
2580   }
2581   if (is_hygon()) {
2582     return "Dhyana";
2583   }
2584   return "Unknown x86";
2585 }
2586 
2587 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2588   assert(buf != nullptr, "buffer is null!");
2589   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2590 
2591   const char* cpu_type = nullptr;
2592   const char* x64 = nullptr;
2593 
2594   if (is_intel()) {
2595     cpu_type = "Intel";
2596     x64 = cpu_is_em64t() ? " Intel64" : "";
2597   } else if (is_amd()) {
2598     cpu_type = "AMD";
2599     x64 = cpu_is_em64t() ? " AMD64" : "";
2600   } else if (is_zx()) {
2601     cpu_type = "Zhaoxin";
2602     x64 = cpu_is_em64t() ? " x86_64" : "";
2603   } else if (is_hygon()) {
2604     cpu_type = "Hygon";
2605     x64 = cpu_is_em64t() ? " AMD64" : "";
2606   } else {
2607     cpu_type = "Unknown x86";
2608     x64 = cpu_is_em64t() ? " x86_64" : "";
2609   }
2610 
2611   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2612     cpu_type,
2613     cpu_family_description(),
2614     supports_ht() ? " (HT)" : "",
2615     supports_sse3() ? " SSE3" : "",
2616     supports_ssse3() ? " SSSE3" : "",
2617     supports_sse4_1() ? " SSE4.1" : "",
2618     supports_sse4_2() ? " SSE4.2" : "",
2619     supports_sse4a() ? " SSE4A" : "",
2620     is_netburst() ? " Netburst" : "",
2621     is_intel_family_core() ? " Core" : "",
2622     x64);
2623 
2624   return OS_OK;
2625 }
2626 
2627 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2628   assert(buf != nullptr, "buffer is null!");
2629   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2630   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2631 
2632   // invoke newly generated asm code to fetch CPU Brand String
2633   getCPUIDBrandString_stub(&_cpuid_info);
2634 
2635   // fetch results into buffer
2636   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2637   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2638   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2639   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2640   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2641   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2642   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2643   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2644   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2645   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2646   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2647   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2648 
2649   return OS_OK;
2650 }
2651 
2652 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2653   guarantee(buf != nullptr, "buffer is null!");
2654   guarantee(buf_len > 0, "buffer len not enough!");
2655 
2656   unsigned int flag = 0;
2657   unsigned int fi = 0;
2658   size_t       written = 0;
2659   const char*  prefix = "";
2660 
2661 #define WRITE_TO_BUF(string)                                                          \
2662   {                                                                                   \
2663     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2664     if (res < 0) {                                                                    \
2665       return buf_len - 1;                                                             \
2666     }                                                                                 \
2667     written += res;                                                                   \
2668     if (prefix[0] == '\0') {                                                          \
2669       prefix = ", ";                                                                  \
2670     }                                                                                 \
2671   }
2672 
2673   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2674     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2675       continue; /* no hyperthreading */
2676     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2677       continue; /* no fast system call */
2678     }
2679     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2680       WRITE_TO_BUF(_feature_edx_id[fi]);
2681     }
2682   }
2683 
2684   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2685     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2686       WRITE_TO_BUF(_feature_ecx_id[fi]);
2687     }
2688   }
2689 
2690   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2691     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2692       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2693     }
2694   }
2695 
2696   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2697     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2698       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2699     }
2700   }
2701 
2702   if (supports_tscinv_bit()) {
2703       WRITE_TO_BUF("Invariant TSC");
2704   }
2705 
2706   if (supports_hybrid()) {
2707       WRITE_TO_BUF("Hybrid Architecture");
2708   }
2709 
2710   return written;
2711 }
2712 
2713 /**
2714  * Write a detailed description of the cpu to a given buffer, including
2715  * feature set.
2716  */
2717 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2718   assert(buf != nullptr, "buffer is null!");
2719   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2720 
2721   static const char* unknown = "<unknown>";
2722   char               vendor_id[VENDOR_LENGTH];
2723   const char*        family = nullptr;
2724   const char*        model = nullptr;
2725   const char*        brand = nullptr;
2726   int                outputLen = 0;
2727 
2728   family = cpu_family_description();
2729   if (family == nullptr) {
2730     family = unknown;
2731   }
2732 
2733   model = cpu_model_description();
2734   if (model == nullptr) {
2735     model = unknown;
2736   }
2737 
2738   brand = cpu_brand_string();
2739 
2740   if (brand == nullptr) {
2741     brand = cpu_brand();
2742     if (brand == nullptr) {
2743       brand = unknown;
2744     }
2745   }
2746 
2747   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2748   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2749   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2750   vendor_id[VENDOR_LENGTH-1] = '\0';
2751 
2752   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2753     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2754     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2755     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2756     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2757     "Supports: ",
2758     brand,
2759     vendor_id,
2760     family,
2761     extended_cpu_family(),
2762     model,
2763     extended_cpu_model(),
2764     cpu_stepping(),
2765     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2766     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2767     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2768     _cpuid_info.std_cpuid1_eax.value,
2769     _cpuid_info.std_cpuid1_ebx.value,
2770     _cpuid_info.std_cpuid1_ecx.value,
2771     _cpuid_info.std_cpuid1_edx.value,
2772     _cpuid_info.ext_cpuid1_eax,
2773     _cpuid_info.ext_cpuid1_ebx,
2774     _cpuid_info.ext_cpuid1_ecx,
2775     _cpuid_info.ext_cpuid1_edx);
2776 
2777   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2778     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2779     return OS_ERR;
2780   }
2781 
2782   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2783 
2784   return OS_OK;
2785 }
2786 
2787 
2788 // Fill in Abstract_VM_Version statics
2789 void VM_Version::initialize_cpu_information() {
2790   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2791   assert(!_initialized, "shouldn't be initialized yet");
2792   resolve_cpu_information_details();
2793 
2794   // initialize cpu_name and cpu_desc
2795   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2796   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2797   _initialized = true;
2798 }
2799 
2800 /**
2801  *  For information about extracting the frequency from the cpu brand string, please see:
2802  *
2803  *    Intel Processor Identification and the CPUID Instruction
2804  *    Application Note 485
2805  *    May 2012
2806  *
2807  * The return value is the frequency in Hz.
2808  */
2809 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2810   const char* const brand_string = cpu_brand_string();
2811   if (brand_string == nullptr) {
2812     return 0;
2813   }
2814   const int64_t MEGA = 1000000;
2815   int64_t multiplier = 0;
2816   int64_t frequency = 0;
2817   uint8_t idx = 0;
2818   // The brand string buffer is at most 48 bytes.
2819   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2820   for (; idx < 48-2; ++idx) {
2821     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2822     // Search brand string for "yHz" where y is M, G, or T.
2823     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2824       if (brand_string[idx] == 'M') {
2825         multiplier = MEGA;
2826       } else if (brand_string[idx] == 'G') {
2827         multiplier = MEGA * 1000;
2828       } else if (brand_string[idx] == 'T') {
2829         multiplier = MEGA * MEGA;
2830       }
2831       break;
2832     }
2833   }
2834   if (multiplier > 0) {
2835     // Compute frequency (in Hz) from brand string.
2836     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2837       frequency =  (brand_string[idx-4] - '0') * multiplier;
2838       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2839       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2840     } else { // format is "xxxx"
2841       frequency =  (brand_string[idx-4] - '0') * 1000;
2842       frequency += (brand_string[idx-3] - '0') * 100;
2843       frequency += (brand_string[idx-2] - '0') * 10;
2844       frequency += (brand_string[idx-1] - '0');
2845       frequency *= multiplier;
2846     }
2847   }
2848   return frequency;
2849 }
2850 
2851 
2852 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2853   if (_max_qualified_cpu_frequency == 0) {
2854     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2855   }
2856   return _max_qualified_cpu_frequency;
2857 }
2858 
2859 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2860   VM_Features vm_features;
2861 
2862   // check the features that must be present
2863   guarantee(std_cpuid1_edx.bits.sse2 != 0, "sse2 is not supported");
2864   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
2865   // clflush_size is size in quadwords (8 bytes).
2866   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == ICache::line_size/8, "clflush size is not supported");
2867 
2868   // sse and sse2 are guaranteed to be present
2869   vm_features.set_feature(CPU_SSE);
2870   vm_features.set_feature(CPU_SSE2);
2871 
2872   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2873     vm_features.set_feature(CPU_CX8);
2874   if (std_cpuid1_edx.bits.cmov != 0)
2875     vm_features.set_feature(CPU_CMOV);
2876   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2877       ext_cpuid1_edx.bits.fxsr != 0))
2878     vm_features.set_feature(CPU_FXSR);
2879   // HT flag is set for multi-core processors also.
2880   if (threads_per_core() > 1)
2881     vm_features.set_feature(CPU_HT);
2882   if (std_cpuid1_ecx.bits.sse3 != 0)
2883     vm_features.set_feature(CPU_SSE3);
2884   if (std_cpuid1_ecx.bits.ssse3 != 0)
2885     vm_features.set_feature(CPU_SSSE3);
2886   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2887     vm_features.set_feature(CPU_SSE4_1);
2888   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2889     vm_features.set_feature(CPU_SSE4_2);
2890   if (std_cpuid1_ecx.bits.popcnt != 0)
2891     vm_features.set_feature(CPU_POPCNT);
2892   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2893       xem_xcr0_eax.bits.apx_f != 0 &&
2894       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2895     vm_features.set_feature(CPU_APX_F);
2896   }
2897   if (std_cpuid1_ecx.bits.avx != 0 &&
2898       std_cpuid1_ecx.bits.osxsave != 0 &&
2899       xem_xcr0_eax.bits.sse != 0 &&
2900       xem_xcr0_eax.bits.ymm != 0) {
2901     vm_features.set_feature(CPU_AVX);
2902     vm_features.set_feature(CPU_VZEROUPPER);
2903     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2904       vm_features.set_feature(CPU_SHA512);
2905     if (std_cpuid1_ecx.bits.f16c != 0)
2906       vm_features.set_feature(CPU_F16C);
2907     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2908       vm_features.set_feature(CPU_AVX2);
2909       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2910         vm_features.set_feature(CPU_AVX_IFMA);
2911     }
2912     if (sef_cpuid7_ecx.bits.gfni != 0)
2913         vm_features.set_feature(CPU_GFNI);
2914     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2915         xem_xcr0_eax.bits.opmask != 0 &&
2916         xem_xcr0_eax.bits.zmm512 != 0 &&
2917         xem_xcr0_eax.bits.zmm32 != 0) {
2918       vm_features.set_feature(CPU_AVX512F);
2919       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2920         vm_features.set_feature(CPU_AVX512CD);
2921       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2922         vm_features.set_feature(CPU_AVX512DQ);
2923       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2924         vm_features.set_feature(CPU_AVX512_IFMA);
2925       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2926         vm_features.set_feature(CPU_AVX512PF);
2927       if (sef_cpuid7_ebx.bits.avx512er != 0)
2928         vm_features.set_feature(CPU_AVX512ER);
2929       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2930         vm_features.set_feature(CPU_AVX512BW);
2931       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2932         vm_features.set_feature(CPU_AVX512VL);
2933       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2934         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2935       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2936         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2937       if (sef_cpuid7_ecx.bits.vaes != 0)
2938         vm_features.set_feature(CPU_AVX512_VAES);
2939       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2940         vm_features.set_feature(CPU_AVX512_VNNI);
2941       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2942         vm_features.set_feature(CPU_AVX512_BITALG);
2943       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2944         vm_features.set_feature(CPU_AVX512_VBMI);
2945       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2946         vm_features.set_feature(CPU_AVX512_VBMI2);
2947     }
2948     if (is_intel()) {
2949       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2950           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2951           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2952           xem_xcr0_eax.bits.opmask != 0 &&
2953           xem_xcr0_eax.bits.zmm512 != 0 &&
2954           xem_xcr0_eax.bits.zmm32 != 0) {
2955         vm_features.set_feature(CPU_AVX10_1);
2956         vm_features.set_feature(CPU_AVX512F);
2957         vm_features.set_feature(CPU_AVX512CD);
2958         vm_features.set_feature(CPU_AVX512DQ);
2959         vm_features.set_feature(CPU_AVX512PF);
2960         vm_features.set_feature(CPU_AVX512ER);
2961         vm_features.set_feature(CPU_AVX512BW);
2962         vm_features.set_feature(CPU_AVX512VL);
2963         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2964         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2965         vm_features.set_feature(CPU_AVX512_VAES);
2966         vm_features.set_feature(CPU_AVX512_VNNI);
2967         vm_features.set_feature(CPU_AVX512_BITALG);
2968         vm_features.set_feature(CPU_AVX512_VBMI);
2969         vm_features.set_feature(CPU_AVX512_VBMI2);
2970         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
2971           vm_features.set_feature(CPU_AVX10_2);
2972         }
2973       }
2974     }
2975   }
2976 
2977   if (std_cpuid1_ecx.bits.hv != 0)
2978     vm_features.set_feature(CPU_HV);
2979   if (sef_cpuid7_ebx.bits.bmi1 != 0)
2980     vm_features.set_feature(CPU_BMI1);
2981   if (std_cpuid1_edx.bits.tsc != 0)
2982     vm_features.set_feature(CPU_TSC);
2983   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
2984     vm_features.set_feature(CPU_TSCINV_BIT);
2985   if (std_cpuid1_ecx.bits.aes != 0)
2986     vm_features.set_feature(CPU_AES);
2987   if (ext_cpuid1_ecx.bits.lzcnt != 0)
2988     vm_features.set_feature(CPU_LZCNT);
2989   if (ext_cpuid1_ecx.bits.prefetchw != 0)
2990     vm_features.set_feature(CPU_3DNOW_PREFETCH);
2991   if (sef_cpuid7_ebx.bits.erms != 0)
2992     vm_features.set_feature(CPU_ERMS);
2993   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2994     vm_features.set_feature(CPU_FSRM);
2995   if (std_cpuid1_ecx.bits.clmul != 0)
2996     vm_features.set_feature(CPU_CLMUL);
2997   if (sef_cpuid7_ebx.bits.rtm != 0)
2998     vm_features.set_feature(CPU_RTM);
2999   if (sef_cpuid7_ebx.bits.adx != 0)
3000      vm_features.set_feature(CPU_ADX);
3001   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3002     vm_features.set_feature(CPU_BMI2);
3003   if (sef_cpuid7_ebx.bits.sha != 0)
3004     vm_features.set_feature(CPU_SHA);
3005   if (std_cpuid1_ecx.bits.fma != 0)
3006     vm_features.set_feature(CPU_FMA);
3007   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3008     vm_features.set_feature(CPU_FLUSHOPT);
3009   if (sef_cpuid7_ebx.bits.clwb != 0)
3010     vm_features.set_feature(CPU_CLWB);
3011   if (ext_cpuid1_edx.bits.rdtscp != 0)
3012     vm_features.set_feature(CPU_RDTSCP);
3013   if (sef_cpuid7_ecx.bits.rdpid != 0)
3014     vm_features.set_feature(CPU_RDPID);
3015 
3016   // AMD|Hygon additional features.
3017   if (is_amd_family()) {
3018     // PREFETCHW was checked above, check TDNOW here.
3019     if ((ext_cpuid1_edx.bits.tdnow != 0))
3020       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3021     if (ext_cpuid1_ecx.bits.sse4a != 0)
3022       vm_features.set_feature(CPU_SSE4A);
3023   }
3024 
3025   // Intel additional features.
3026   if (is_intel()) {
3027     if (sef_cpuid7_edx.bits.serialize != 0)
3028       vm_features.set_feature(CPU_SERIALIZE);
3029     if (sef_cpuid7_edx.bits.hybrid != 0)
3030       vm_features.set_feature(CPU_HYBRID);
3031     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3032       vm_features.set_feature(CPU_AVX512_FP16);
3033   }
3034 
3035   // ZX additional features.
3036   if (is_zx()) {
3037     // We do not know if these are supported by ZX, so we cannot trust
3038     // common CPUID bit for them.
3039     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3040     vm_features.clear_feature(CPU_CLWB);
3041   }
3042 
3043   // Protection key features.
3044   if (sef_cpuid7_ecx.bits.pku != 0) {
3045     vm_features.set_feature(CPU_PKU);
3046   }
3047   if (sef_cpuid7_ecx.bits.ospke != 0) {
3048     vm_features.set_feature(CPU_OSPKE);
3049   }
3050 
3051   // Control flow enforcement (CET) features.
3052   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3053     vm_features.set_feature(CPU_CET_SS);
3054   }
3055   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3056     vm_features.set_feature(CPU_CET_IBT);
3057   }
3058 
3059   // Composite features.
3060   if (supports_tscinv_bit() &&
3061       ((is_amd_family() && !is_amd_Barcelona()) ||
3062        is_intel_tsc_synched_at_init())) {
3063     vm_features.set_feature(CPU_TSCINV);
3064   }
3065   return vm_features;
3066 }
3067 
3068 bool VM_Version::os_supports_avx_vectors() {
3069   bool retVal = false;
3070   int nreg = 4;
3071   if (supports_evex()) {
3072     // Verify that OS save/restore all bits of EVEX registers
3073     // during signal processing.
3074     retVal = true;
3075     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3076       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3077         retVal = false;
3078         break;
3079       }
3080     }
3081   } else if (supports_avx()) {
3082     // Verify that OS save/restore all bits of AVX registers
3083     // during signal processing.
3084     retVal = true;
3085     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3086       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3087         retVal = false;
3088         break;
3089       }
3090     }
3091     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3092     if (retVal == false) {
3093       // Verify that OS save/restore all bits of EVEX registers
3094       // during signal processing.
3095       retVal = true;
3096       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3097         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3098           retVal = false;
3099           break;
3100         }
3101       }
3102     }
3103   }
3104   return retVal;
3105 }
3106 
3107 bool VM_Version::os_supports_apx_egprs() {
3108   if (!supports_apx_f()) {
3109     return false;
3110   }
3111   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3112       _cpuid_info.apx_save[1] != egpr_test_value()) {
3113     return false;
3114   }
3115   return true;
3116 }
3117 
3118 uint VM_Version::cores_per_cpu() {
3119   uint result = 1;
3120   if (is_intel()) {
3121     bool supports_topology = supports_processor_topology();
3122     if (supports_topology) {
3123       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3124                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3125     }
3126     if (!supports_topology || result == 0) {
3127       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3128     }
3129   } else if (is_amd_family()) {
3130     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3131     if (cpu_family() >= 0x17) { // Zen or later
3132       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3133     }
3134   } else if (is_zx()) {
3135     bool supports_topology = supports_processor_topology();
3136     if (supports_topology) {
3137       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3138                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3139     }
3140     if (!supports_topology || result == 0) {
3141       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3142     }
3143   }
3144   return result;
3145 }
3146 
3147 uint VM_Version::threads_per_core() {
3148   uint result = 1;
3149   if (is_intel() && supports_processor_topology()) {
3150     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3151   } else if (is_zx() && supports_processor_topology()) {
3152     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3153   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3154     if (cpu_family() >= 0x17) {
3155       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3156     } else {
3157       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3158                  cores_per_cpu();
3159     }
3160   }
3161   return (result == 0 ? 1 : result);
3162 }
3163 
3164 uint VM_Version::L1_line_size() {
3165   uint result = 0;
3166   if (is_intel()) {
3167     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3168   } else if (is_amd_family()) {
3169     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3170   } else if (is_zx()) {
3171     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3172   }
3173   if (result < 32) // not defined ?
3174     result = 32;   // 32 bytes by default on x86 and other x64
3175   return result;
3176 }
3177 
3178 bool VM_Version::is_intel_tsc_synched_at_init() {
3179   if (is_intel_family_core()) {
3180     uint32_t ext_model = extended_cpu_model();
3181     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3182         ext_model == CPU_MODEL_WESTMERE_EP    ||
3183         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3184         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3185       // <= 2-socket invariant tsc support. EX versions are usually used
3186       // in > 2-socket systems and likely don't synchronize tscs at
3187       // initialization.
3188       // Code that uses tsc values must be prepared for them to arbitrarily
3189       // jump forward or backward.
3190       return true;
3191     }
3192   }
3193   return false;
3194 }
3195 
3196 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3197   // Hardware prefetching (distance/size in bytes):
3198   // Pentium 3 -  64 /  32
3199   // Pentium 4 - 256 / 128
3200   // Athlon    -  64 /  32 ????
3201   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3202   // Core      - 128 /  64
3203   //
3204   // Software prefetching (distance in bytes / instruction with best score):
3205   // Pentium 3 - 128 / prefetchnta
3206   // Pentium 4 - 512 / prefetchnta
3207   // Athlon    - 128 / prefetchnta
3208   // Opteron   - 256 / prefetchnta
3209   // Core      - 256 / prefetchnta
3210   // It will be used only when AllocatePrefetchStyle > 0
3211 
3212   if (is_amd_family()) { // AMD | Hygon
3213     return 256; // Opteron
3214   } else if (is_zx()) {
3215     return 256;
3216   } else { // Intel
3217     if (supports_sse3() && is_intel_server_family()) {
3218       if (is_intel_modern_cpu()) { // Nehalem based cpus
3219         return 192;
3220       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3221         return 384;
3222       }
3223     }
3224     if (is_intel_server_family()) {
3225       return 256; // Pentium M, Core, Core2
3226     } else {
3227       return 512; // Pentium 4
3228     }
3229   }
3230 }
3231 
3232 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3233   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3234   switch (id) {
3235   case vmIntrinsics::_floatToFloat16:
3236   case vmIntrinsics::_float16ToFloat:
3237     if (!supports_float16()) {
3238       return false;
3239     }
3240     break;
3241   default:
3242     break;
3243   }
3244   return true;
3245 }
3246 
3247 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3248   int i = 0;
3249   ss.join([&]() {
3250     const char* str = nullptr;
3251     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3252       if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3253         str = _features_names[i];
3254       }
3255       i += 1;
3256     }
3257     return str;
3258   }, ", ");
3259 }
3260 
3261 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3262   VM_Features* features = (VM_Features*)features_buffer;
3263   insert_features_names(*features, ss);
3264 }
3265 
3266 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3267   VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3268   VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3269   int i = 0;
3270   ss.join([&]() {
3271     const char* str = nullptr;
3272     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3273       Feature_Flag flag = (Feature_Flag)i;
3274       if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3275         str = _features_names[i];
3276       }
3277       i += 1;
3278     }
3279     return str;
3280   }, ", ");
3281 }
3282 
3283 int VM_Version::cpu_features_size() {
3284   return sizeof(VM_Features);
3285 }
3286 
3287 void VM_Version::store_cpu_features(void* buf) {
3288   VM_Features copy = _features.aot_code_cache_features();
3289   memcpy(buf, &copy, sizeof(VM_Features));
3290 }
3291 
3292 bool VM_Version::verify_aot_code_cache_features(void* features_buffer) {
3293   VM_Features* features_to_test = (VM_Features*)features_buffer;
3294   VM_Features rt_features = _features.aot_code_cache_features();
3295   return rt_features.verify_aot_code_cache_features(features_to_test);
3296 }