1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2550;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76   typedef void (*getCPUIDBrandString_stub_t)(void*);
  77 }
  78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  79 static detect_virt_stub_t detect_virt_stub = nullptr;
  80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
  82 
  83 bool VM_Version::supports_clflush() {
  84   // clflush should always be available on x86_64
  85   // if not we are in real trouble because we rely on it
  86   // to flush the code cache.
  87   // Unfortunately, Assembler::clflush is currently called as part
  88   // of generation of the code cache flush routine. This happens
  89   // under Universe::init before the processor features are set
  90   // up. Assembler::flush calls this routine to check that clflush
  91   // is allowed. So, we give the caller a free pass if Universe init
  92   // is still in progress.
  93   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  94   return true;
  95 }
  96 
  97 #define CPUID_STANDARD_FN   0x0
  98 #define CPUID_STANDARD_FN_1 0x1
  99 #define CPUID_STANDARD_FN_4 0x4
 100 #define CPUID_STANDARD_FN_B 0xb
 101 
 102 #define CPUID_EXTENDED_FN   0x80000000
 103 #define CPUID_EXTENDED_FN_1 0x80000001
 104 #define CPUID_EXTENDED_FN_2 0x80000002
 105 #define CPUID_EXTENDED_FN_3 0x80000003
 106 #define CPUID_EXTENDED_FN_4 0x80000004
 107 #define CPUID_EXTENDED_FN_7 0x80000007
 108 #define CPUID_EXTENDED_FN_8 0x80000008
 109 
 110 class VM_Version_StubGenerator: public StubCodeGenerator {
 111  public:
 112 
 113   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 114 
 115   address clear_apx_test_state() {
 116 #   define __ _masm->
 117     address start = __ pc();
 118     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 119     // handling guarantees that preserved register values post signal handling were
 120     // re-instantiated by operating system and not because they were not modified externally.
 121 
 122     bool save_apx = UseAPX;
 123     VM_Version::set_apx_cpuFeatures();
 124     UseAPX = true;
 125     // EGPR state save/restoration.
 126     __ mov64(r16, 0L);
 127     __ mov64(r31, 0L);
 128     UseAPX = save_apx;
 129     VM_Version::clean_cpuFeatures();
 130     __ ret(0);
 131     return start;
 132   }
 133 
 134   address generate_get_cpu_info() {
 135     // Flags to test CPU type.
 136     const uint32_t HS_EFL_AC = 0x40000;
 137     const uint32_t HS_EFL_ID = 0x200000;
 138     // Values for when we don't have a CPUID instruction.
 139     const int      CPU_FAMILY_SHIFT = 8;
 140     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 141     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 142     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 143 
 144     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 145     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 146     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 147     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 148 
 149     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 150 #   define __ _masm->
 151 
 152     address start = __ pc();
 153 
 154     //
 155     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 156     //
 157     // rcx and rdx are first and second argument registers on windows
 158 
 159     __ push(rbp);
 160     __ mov(rbp, c_rarg0); // cpuid_info address
 161     __ push(rbx);
 162     __ push(rsi);
 163     __ pushf();          // preserve rbx, and flags
 164     __ pop(rax);
 165     __ push(rax);
 166     __ mov(rcx, rax);
 167     //
 168     // if we are unable to change the AC flag, we have a 386
 169     //
 170     __ xorl(rax, HS_EFL_AC);
 171     __ push(rax);
 172     __ popf();
 173     __ pushf();
 174     __ pop(rax);
 175     __ cmpptr(rax, rcx);
 176     __ jccb(Assembler::notEqual, detect_486);
 177 
 178     __ movl(rax, CPU_FAMILY_386);
 179     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 180     __ jmp(done);
 181 
 182     //
 183     // If we are unable to change the ID flag, we have a 486 which does
 184     // not support the "cpuid" instruction.
 185     //
 186     __ bind(detect_486);
 187     __ mov(rax, rcx);
 188     __ xorl(rax, HS_EFL_ID);
 189     __ push(rax);
 190     __ popf();
 191     __ pushf();
 192     __ pop(rax);
 193     __ cmpptr(rcx, rax);
 194     __ jccb(Assembler::notEqual, detect_586);
 195 
 196     __ bind(cpu486);
 197     __ movl(rax, CPU_FAMILY_486);
 198     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 199     __ jmp(done);
 200 
 201     //
 202     // At this point, we have a chip which supports the "cpuid" instruction
 203     //
 204     __ bind(detect_586);
 205     __ xorl(rax, rax);
 206     __ cpuid();
 207     __ orl(rax, rax);
 208     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 209                                         // value of at least 1, we give up and
 210                                         // assume a 486
 211     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 212     __ movl(Address(rsi, 0), rax);
 213     __ movl(Address(rsi, 4), rbx);
 214     __ movl(Address(rsi, 8), rcx);
 215     __ movl(Address(rsi,12), rdx);
 216 
 217     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 218     __ jccb(Assembler::belowEqual, std_cpuid4);
 219 
 220     //
 221     // cpuid(0xB) Processor Topology
 222     //
 223     __ movl(rax, 0xb);
 224     __ xorl(rcx, rcx);   // Threads level
 225     __ cpuid();
 226 
 227     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 228     __ movl(Address(rsi, 0), rax);
 229     __ movl(Address(rsi, 4), rbx);
 230     __ movl(Address(rsi, 8), rcx);
 231     __ movl(Address(rsi,12), rdx);
 232 
 233     __ movl(rax, 0xb);
 234     __ movl(rcx, 1);     // Cores level
 235     __ cpuid();
 236     __ push(rax);
 237     __ andl(rax, 0x1f);  // Determine if valid topology level
 238     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 239     __ andl(rax, 0xffff);
 240     __ pop(rax);
 241     __ jccb(Assembler::equal, std_cpuid4);
 242 
 243     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 244     __ movl(Address(rsi, 0), rax);
 245     __ movl(Address(rsi, 4), rbx);
 246     __ movl(Address(rsi, 8), rcx);
 247     __ movl(Address(rsi,12), rdx);
 248 
 249     __ movl(rax, 0xb);
 250     __ movl(rcx, 2);     // Packages level
 251     __ cpuid();
 252     __ push(rax);
 253     __ andl(rax, 0x1f);  // Determine if valid topology level
 254     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 255     __ andl(rax, 0xffff);
 256     __ pop(rax);
 257     __ jccb(Assembler::equal, std_cpuid4);
 258 
 259     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 260     __ movl(Address(rsi, 0), rax);
 261     __ movl(Address(rsi, 4), rbx);
 262     __ movl(Address(rsi, 8), rcx);
 263     __ movl(Address(rsi,12), rdx);
 264 
 265     //
 266     // cpuid(0x4) Deterministic cache params
 267     //
 268     __ bind(std_cpuid4);
 269     __ movl(rax, 4);
 270     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 271     __ jccb(Assembler::greater, std_cpuid1);
 272 
 273     __ xorl(rcx, rcx);   // L1 cache
 274     __ cpuid();
 275     __ push(rax);
 276     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 277     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 278     __ pop(rax);
 279     __ jccb(Assembler::equal, std_cpuid1);
 280 
 281     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 282     __ movl(Address(rsi, 0), rax);
 283     __ movl(Address(rsi, 4), rbx);
 284     __ movl(Address(rsi, 8), rcx);
 285     __ movl(Address(rsi,12), rdx);
 286 
 287     //
 288     // Standard cpuid(0x1)
 289     //
 290     __ bind(std_cpuid1);
 291     __ movl(rax, 1);
 292     __ cpuid();
 293     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 294     __ movl(Address(rsi, 0), rax);
 295     __ movl(Address(rsi, 4), rbx);
 296     __ movl(Address(rsi, 8), rcx);
 297     __ movl(Address(rsi,12), rdx);
 298 
 299     //
 300     // Check if OS has enabled XGETBV instruction to access XCR0
 301     // (OSXSAVE feature flag) and CPU supports AVX
 302     //
 303     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 304     __ cmpl(rcx, 0x18000000);
 305     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 306 
 307     //
 308     // XCR0, XFEATURE_ENABLED_MASK register
 309     //
 310     __ xorl(rcx, rcx);   // zero for XCR0 register
 311     __ xgetbv();
 312     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 313     __ movl(Address(rsi, 0), rax);
 314     __ movl(Address(rsi, 4), rdx);
 315 
 316     //
 317     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 318     //
 319     __ bind(sef_cpuid);
 320     __ movl(rax, 7);
 321     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 322     __ jccb(Assembler::greater, ext_cpuid);
 323     // ECX = 0
 324     __ xorl(rcx, rcx);
 325     __ cpuid();
 326     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 327     __ movl(Address(rsi, 0), rax);
 328     __ movl(Address(rsi, 4), rbx);
 329     __ movl(Address(rsi, 8), rcx);
 330     __ movl(Address(rsi, 12), rdx);
 331 
 332     //
 333     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 334     //
 335     __ bind(sefsl1_cpuid);
 336     __ movl(rax, 7);
 337     __ movl(rcx, 1);
 338     __ cpuid();
 339     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 340     __ movl(Address(rsi, 0), rax);
 341     __ movl(Address(rsi, 4), rdx);
 342 
 343     //
 344     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 345     //
 346     __ bind(std_cpuid29);
 347     __ movl(rax, 0x29);
 348     __ movl(rcx, 0);
 349     __ cpuid();
 350     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 351     __ movl(Address(rsi, 0), rbx);
 352 
 353     //
 354     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 355     //
 356     __ bind(std_cpuid24);
 357     __ movl(rax, 0x24);
 358     __ movl(rcx, 0);
 359     __ cpuid();
 360     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 361     __ movl(Address(rsi, 0), rax);
 362     __ movl(Address(rsi, 4), rbx);
 363 
 364     //
 365     // Extended cpuid(0x80000000)
 366     //
 367     __ bind(ext_cpuid);
 368     __ movl(rax, 0x80000000);
 369     __ cpuid();
 370     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 371     __ jcc(Assembler::belowEqual, done);
 372     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 373     __ jcc(Assembler::belowEqual, ext_cpuid1);
 374     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 375     __ jccb(Assembler::belowEqual, ext_cpuid5);
 376     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 377     __ jccb(Assembler::belowEqual, ext_cpuid7);
 378     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 379     __ jccb(Assembler::belowEqual, ext_cpuid8);
 380     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 381     __ jccb(Assembler::below, ext_cpuid8);
 382     //
 383     // Extended cpuid(0x8000001E)
 384     //
 385     __ movl(rax, 0x8000001E);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000008)
 395     //
 396     __ bind(ext_cpuid8);
 397     __ movl(rax, 0x80000008);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000007)
 407     //
 408     __ bind(ext_cpuid7);
 409     __ movl(rax, 0x80000007);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000005)
 419     //
 420     __ bind(ext_cpuid5);
 421     __ movl(rax, 0x80000005);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Extended cpuid(0x80000001)
 431     //
 432     __ bind(ext_cpuid1);
 433     __ movl(rax, 0x80000001);
 434     __ cpuid();
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 436     __ movl(Address(rsi, 0), rax);
 437     __ movl(Address(rsi, 4), rbx);
 438     __ movl(Address(rsi, 8), rcx);
 439     __ movl(Address(rsi,12), rdx);
 440 
 441     //
 442     // Check if OS has enabled XGETBV instruction to access XCR0
 443     // (OSXSAVE feature flag) and CPU supports APX
 444     //
 445     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 446     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 447     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 448     __ movl(rax, 0x200000);
 449     __ andl(rax, Address(rsi, 4));
 450     __ jcc(Assembler::equal, vector_save_restore);
 451     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 452     __ movl(rax, 0x80000);
 453     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 454     __ jcc(Assembler::equal, vector_save_restore);
 455 
 456     bool save_apx = UseAPX;
 457     VM_Version::set_apx_cpuFeatures();
 458     UseAPX = true;
 459     __ mov64(r16, VM_Version::egpr_test_value());
 460     __ mov64(r31, VM_Version::egpr_test_value());
 461     __ xorl(rsi, rsi);
 462     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 463     // Generate SEGV
 464     __ movl(rax, Address(rsi, 0));
 465 
 466     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 467     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 468     __ movq(Address(rsi, 0), r16);
 469     __ movq(Address(rsi, 8), r31);
 470 
 471     UseAPX = save_apx;
 472     __ bind(vector_save_restore);
 473     //
 474     // Check if OS has enabled XGETBV instruction to access XCR0
 475     // (OSXSAVE feature flag) and CPU supports AVX
 476     //
 477     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 478     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 479     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 480     __ cmpl(rcx, 0x18000000);
 481     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 482 
 483     __ movl(rax, 0x6);
 484     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 485     __ cmpl(rax, 0x6);
 486     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 487 
 488     // we need to bridge farther than imm8, so we use this island as a thunk
 489     __ bind(done);
 490     __ jmp(wrapup);
 491 
 492     __ bind(start_simd_check);
 493     //
 494     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 495     // registers are not restored after a signal processing.
 496     // Generate SEGV here (reference through null)
 497     // and check upper YMM/ZMM bits after it.
 498     //
 499     int saved_useavx = UseAVX;
 500     int saved_usesse = UseSSE;
 501 
 502     // If UseAVX is uninitialized or is set by the user to include EVEX
 503     if (use_evex) {
 504       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 505       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 506       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 507       __ movl(rax, 0x10000);
 508       __ andl(rax, Address(rsi, 4));
 509       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 510       __ movl(rbx, 0x80000);
 511       __ andl(rbx, Address(rsi, 4));
 512       __ orl(rax, rbx);
 513       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 514       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 515       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 516       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 517       __ movl(rax, 0xE0);
 518       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 519       __ cmpl(rax, 0xE0);
 520       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 521 
 522       if (FLAG_IS_DEFAULT(UseAVX)) {
 523         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 524         __ movl(rax, Address(rsi, 0));
 525         __ cmpl(rax, 0x50654);              // If it is Skylake
 526         __ jcc(Assembler::equal, legacy_setup);
 527       }
 528       // EVEX setup: run in lowest evex mode
 529       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 530       UseAVX = 3;
 531       UseSSE = 2;
 532 #ifdef _WINDOWS
 533       // xmm5-xmm15 are not preserved by caller on windows
 534       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 535       __ subptr(rsp, 64);
 536       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 537       __ subptr(rsp, 64);
 538       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 539       __ subptr(rsp, 64);
 540       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 541 #endif // _WINDOWS
 542 
 543       // load value into all 64 bytes of zmm7 register
 544       __ movl(rcx, VM_Version::ymm_test_value());
 545       __ movdl(xmm0, rcx);
 546       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 547       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 548       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 549       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 550       VM_Version::clean_cpuFeatures();
 551       __ jmp(save_restore_except);
 552     }
 553 
 554     __ bind(legacy_setup);
 555     // AVX setup
 556     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 557     UseAVX = 1;
 558     UseSSE = 2;
 559 #ifdef _WINDOWS
 560     __ subptr(rsp, 32);
 561     __ vmovdqu(Address(rsp, 0), xmm7);
 562     __ subptr(rsp, 32);
 563     __ vmovdqu(Address(rsp, 0), xmm8);
 564     __ subptr(rsp, 32);
 565     __ vmovdqu(Address(rsp, 0), xmm15);
 566 #endif // _WINDOWS
 567 
 568     // load value into all 32 bytes of ymm7 register
 569     __ movl(rcx, VM_Version::ymm_test_value());
 570 
 571     __ movdl(xmm0, rcx);
 572     __ pshufd(xmm0, xmm0, 0x00);
 573     __ vinsertf128_high(xmm0, xmm0);
 574     __ vmovdqu(xmm7, xmm0);
 575     __ vmovdqu(xmm8, xmm0);
 576     __ vmovdqu(xmm15, xmm0);
 577     VM_Version::clean_cpuFeatures();
 578 
 579     __ bind(save_restore_except);
 580     __ xorl(rsi, rsi);
 581     VM_Version::set_cpuinfo_segv_addr(__ pc());
 582     // Generate SEGV
 583     __ movl(rax, Address(rsi, 0));
 584 
 585     VM_Version::set_cpuinfo_cont_addr(__ pc());
 586     // Returns here after signal. Save xmm0 to check it later.
 587 
 588     // If UseAVX is uninitialized or is set by the user to include EVEX
 589     if (use_evex) {
 590       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 591       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 592       __ movl(rax, 0x10000);
 593       __ andl(rax, Address(rsi, 4));
 594       __ jcc(Assembler::equal, legacy_save_restore);
 595       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 596       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 597       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 598       __ movl(rax, 0xE0);
 599       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 600       __ cmpl(rax, 0xE0);
 601       __ jcc(Assembler::notEqual, legacy_save_restore);
 602 
 603       if (FLAG_IS_DEFAULT(UseAVX)) {
 604         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 605         __ movl(rax, Address(rsi, 0));
 606         __ cmpl(rax, 0x50654);              // If it is Skylake
 607         __ jcc(Assembler::equal, legacy_save_restore);
 608       }
 609       // EVEX check: run in lowest evex mode
 610       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 611       UseAVX = 3;
 612       UseSSE = 2;
 613       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 614       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 615       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 616       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 617       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 618 
 619 #ifdef _WINDOWS
 620       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 621       __ addptr(rsp, 64);
 622       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 623       __ addptr(rsp, 64);
 624       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 625       __ addptr(rsp, 64);
 626 #endif // _WINDOWS
 627       generate_vzeroupper(wrapup);
 628       VM_Version::clean_cpuFeatures();
 629       UseAVX = saved_useavx;
 630       UseSSE = saved_usesse;
 631       __ jmp(wrapup);
 632    }
 633 
 634     __ bind(legacy_save_restore);
 635     // AVX check
 636     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 637     UseAVX = 1;
 638     UseSSE = 2;
 639     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 640     __ vmovdqu(Address(rsi, 0), xmm0);
 641     __ vmovdqu(Address(rsi, 32), xmm7);
 642     __ vmovdqu(Address(rsi, 64), xmm8);
 643     __ vmovdqu(Address(rsi, 96), xmm15);
 644 
 645 #ifdef _WINDOWS
 646     __ vmovdqu(xmm15, Address(rsp, 0));
 647     __ addptr(rsp, 32);
 648     __ vmovdqu(xmm8, Address(rsp, 0));
 649     __ addptr(rsp, 32);
 650     __ vmovdqu(xmm7, Address(rsp, 0));
 651     __ addptr(rsp, 32);
 652 #endif // _WINDOWS
 653 
 654     generate_vzeroupper(wrapup);
 655     VM_Version::clean_cpuFeatures();
 656     UseAVX = saved_useavx;
 657     UseSSE = saved_usesse;
 658 
 659     __ bind(wrapup);
 660     __ popf();
 661     __ pop(rsi);
 662     __ pop(rbx);
 663     __ pop(rbp);
 664     __ ret(0);
 665 
 666 #   undef __
 667 
 668     return start;
 669   };
 670   void generate_vzeroupper(Label& L_wrapup) {
 671 #   define __ _masm->
 672     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 673     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 674     __ jcc(Assembler::notEqual, L_wrapup);
 675     __ movl(rcx, 0x0FFF0FF0);
 676     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 677     __ andl(rcx, Address(rsi, 0));
 678     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 679     __ jcc(Assembler::equal, L_wrapup);
 680     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 681     __ jcc(Assembler::equal, L_wrapup);
 682     // vzeroupper() will use a pre-computed instruction sequence that we
 683     // can't compute until after we've determined CPU capabilities. Use
 684     // uncached variant here directly to be able to bootstrap correctly
 685     __ vzeroupper_uncached();
 686 #   undef __
 687   }
 688   address generate_detect_virt() {
 689     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 690 #   define __ _masm->
 691 
 692     address start = __ pc();
 693 
 694     // Evacuate callee-saved registers
 695     __ push(rbp);
 696     __ push(rbx);
 697     __ push(rsi); // for Windows
 698 
 699     __ mov(rax, c_rarg0); // CPUID leaf
 700     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 701 
 702     __ cpuid();
 703 
 704     // Store result to register array
 705     __ movl(Address(rsi,  0), rax);
 706     __ movl(Address(rsi,  4), rbx);
 707     __ movl(Address(rsi,  8), rcx);
 708     __ movl(Address(rsi, 12), rdx);
 709 
 710     // Epilogue
 711     __ pop(rsi);
 712     __ pop(rbx);
 713     __ pop(rbp);
 714     __ ret(0);
 715 
 716 #   undef __
 717 
 718     return start;
 719   };
 720 
 721 
 722   address generate_getCPUIDBrandString(void) {
 723     // Flags to test CPU type.
 724     const uint32_t HS_EFL_AC           = 0x40000;
 725     const uint32_t HS_EFL_ID           = 0x200000;
 726     // Values for when we don't have a CPUID instruction.
 727     const int      CPU_FAMILY_SHIFT = 8;
 728     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 729     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 730 
 731     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 732 
 733     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 734 #   define __ _masm->
 735 
 736     address start = __ pc();
 737 
 738     //
 739     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 740     //
 741     // rcx and rdx are first and second argument registers on windows
 742 
 743     __ push(rbp);
 744     __ mov(rbp, c_rarg0); // cpuid_info address
 745     __ push(rbx);
 746     __ push(rsi);
 747     __ pushf();          // preserve rbx, and flags
 748     __ pop(rax);
 749     __ push(rax);
 750     __ mov(rcx, rax);
 751     //
 752     // if we are unable to change the AC flag, we have a 386
 753     //
 754     __ xorl(rax, HS_EFL_AC);
 755     __ push(rax);
 756     __ popf();
 757     __ pushf();
 758     __ pop(rax);
 759     __ cmpptr(rax, rcx);
 760     __ jccb(Assembler::notEqual, detect_486);
 761 
 762     __ movl(rax, CPU_FAMILY_386);
 763     __ jmp(done);
 764 
 765     //
 766     // If we are unable to change the ID flag, we have a 486 which does
 767     // not support the "cpuid" instruction.
 768     //
 769     __ bind(detect_486);
 770     __ mov(rax, rcx);
 771     __ xorl(rax, HS_EFL_ID);
 772     __ push(rax);
 773     __ popf();
 774     __ pushf();
 775     __ pop(rax);
 776     __ cmpptr(rcx, rax);
 777     __ jccb(Assembler::notEqual, detect_586);
 778 
 779     __ bind(cpu486);
 780     __ movl(rax, CPU_FAMILY_486);
 781     __ jmp(done);
 782 
 783     //
 784     // At this point, we have a chip which supports the "cpuid" instruction
 785     //
 786     __ bind(detect_586);
 787     __ xorl(rax, rax);
 788     __ cpuid();
 789     __ orl(rax, rax);
 790     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 791                                         // value of at least 1, we give up and
 792                                         // assume a 486
 793 
 794     //
 795     // Extended cpuid(0x80000000) for processor brand string detection
 796     //
 797     __ bind(ext_cpuid);
 798     __ movl(rax, CPUID_EXTENDED_FN);
 799     __ cpuid();
 800     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 801     __ jcc(Assembler::below, done);
 802 
 803     //
 804     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 805     //
 806     __ movl(rax, CPUID_EXTENDED_FN_2);
 807     __ cpuid();
 808     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 809     __ movl(Address(rsi, 0), rax);
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 811     __ movl(Address(rsi, 0), rbx);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 813     __ movl(Address(rsi, 0), rcx);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 815     __ movl(Address(rsi,0), rdx);
 816 
 817     //
 818     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 819     //
 820     __ movl(rax, CPUID_EXTENDED_FN_3);
 821     __ cpuid();
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 823     __ movl(Address(rsi, 0), rax);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 825     __ movl(Address(rsi, 0), rbx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 827     __ movl(Address(rsi, 0), rcx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 829     __ movl(Address(rsi,0), rdx);
 830 
 831     //
 832     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 833     //
 834     __ movl(rax, CPUID_EXTENDED_FN_4);
 835     __ cpuid();
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 837     __ movl(Address(rsi, 0), rax);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 839     __ movl(Address(rsi, 0), rbx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 841     __ movl(Address(rsi, 0), rcx);
 842     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 843     __ movl(Address(rsi,0), rdx);
 844 
 845     //
 846     // return
 847     //
 848     __ bind(done);
 849     __ popf();
 850     __ pop(rsi);
 851     __ pop(rbx);
 852     __ pop(rbp);
 853     __ ret(0);
 854 
 855 #   undef __
 856 
 857     return start;
 858   };
 859 };
 860 
 861 void VM_Version::get_processor_features() {
 862 
 863   _cpu = 4; // 486 by default
 864   _model = 0;
 865   _stepping = 0;
 866   _logical_processors_per_package = 1;
 867   // i486 internal cache is both I&D and has a 16-byte line size
 868   _L1_data_cache_line_size = 16;
 869 
 870   // Get raw processor info
 871 
 872   get_cpu_info_stub(&_cpuid_info);
 873 
 874   assert_is_initialized();
 875   _cpu = extended_cpu_family();
 876   _model = extended_cpu_model();
 877   _stepping = cpu_stepping();
 878 
 879   if (cpu_family() > 4) { // it supports CPUID
 880     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 881     _cpu_features = _features; // Preserve features
 882     // Logical processors are only available on P4s and above,
 883     // and only if hyperthreading is available.
 884     _logical_processors_per_package = logical_processor_count();
 885     _L1_data_cache_line_size = L1_line_size();
 886   }
 887 
 888   // xchg and xadd instructions
 889   _supports_atomic_getset4 = true;
 890   _supports_atomic_getadd4 = true;
 891   _supports_atomic_getset8 = true;
 892   _supports_atomic_getadd8 = true;
 893 
 894   // OS should support SSE for x64 and hardware should support at least SSE2.
 895   if (!VM_Version::supports_sse2()) {
 896     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 897   }
 898   // in 64 bit the use of SSE2 is the minimum
 899   if (UseSSE < 2) UseSSE = 2;
 900 
 901   // flush_icache_stub have to be generated first.
 902   // That is why Icache line size is hard coded in ICache class,
 903   // see icache_x86.hpp. It is also the reason why we can't use
 904   // clflush instruction in 32-bit VM since it could be running
 905   // on CPU which does not support it.
 906   //
 907   // The only thing we can do is to verify that flushed
 908   // ICache::line_size has correct value.
 909   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 910   // clflush_size is size in quadwords (8 bytes).
 911   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 912 
 913   // assigning this field effectively enables Unsafe.writebackMemory()
 914   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 915   // that is only implemented on x86_64 and only if the OS plays ball
 916   if (os::supports_map_sync()) {
 917     // publish data cache line flush size to generic field, otherwise
 918     // let if default to zero thereby disabling writeback
 919     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 920   }
 921 
 922   // Check if processor has Intel Ecore
 923   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 924     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 925       _model == 0xCC || _model == 0xDD)) {
 926     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 927   }
 928 
 929   if (UseSSE < 4) {
 930     _features.clear_feature(CPU_SSE4_1);
 931     _features.clear_feature(CPU_SSE4_2);
 932   }
 933 
 934   if (UseSSE < 3) {
 935     _features.clear_feature(CPU_SSE3);
 936     _features.clear_feature(CPU_SSSE3);
 937     _features.clear_feature(CPU_SSE4A);
 938   }
 939 
 940   if (UseSSE < 2)
 941     _features.clear_feature(CPU_SSE2);
 942 
 943   if (UseSSE < 1)
 944     _features.clear_feature(CPU_SSE);
 945 
 946   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 947   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 948     UseAVX = 0;
 949   }
 950 
 951   // UseSSE is set to the smaller of what hardware supports and what
 952   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 953   // older Pentiums which do not support it.
 954   int use_sse_limit = 0;
 955   if (UseSSE > 0) {
 956     if (UseSSE > 3 && supports_sse4_1()) {
 957       use_sse_limit = 4;
 958     } else if (UseSSE > 2 && supports_sse3()) {
 959       use_sse_limit = 3;
 960     } else if (UseSSE > 1 && supports_sse2()) {
 961       use_sse_limit = 2;
 962     } else if (UseSSE > 0 && supports_sse()) {
 963       use_sse_limit = 1;
 964     } else {
 965       use_sse_limit = 0;
 966     }
 967   }
 968   if (FLAG_IS_DEFAULT(UseSSE)) {
 969     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 970   } else if (UseSSE > use_sse_limit) {
 971     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 972     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 973   }
 974 
 975   // first try initial setting and detect what we can support
 976   int use_avx_limit = 0;
 977   if (UseAVX > 0) {
 978     if (UseSSE < 4) {
 979       // Don't use AVX if SSE is unavailable or has been disabled.
 980       use_avx_limit = 0;
 981     } else if (UseAVX > 2 && supports_evex()) {
 982       use_avx_limit = 3;
 983     } else if (UseAVX > 1 && supports_avx2()) {
 984       use_avx_limit = 2;
 985     } else if (UseAVX > 0 && supports_avx()) {
 986       use_avx_limit = 1;
 987     } else {
 988       use_avx_limit = 0;
 989     }
 990   }
 991   if (FLAG_IS_DEFAULT(UseAVX)) {
 992     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 993     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 994       FLAG_SET_DEFAULT(UseAVX, 2);
 995     } else {
 996       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 997     }
 998   }
 999 
1000   if (UseAVX > use_avx_limit) {
1001     if (UseSSE < 4) {
1002       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1003     } else {
1004       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1005     }
1006     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1007   }
1008 
1009   if (UseAVX < 3) {
1010     _features.clear_feature(CPU_AVX512F);
1011     _features.clear_feature(CPU_AVX512DQ);
1012     _features.clear_feature(CPU_AVX512CD);
1013     _features.clear_feature(CPU_AVX512BW);
1014     _features.clear_feature(CPU_AVX512ER);
1015     _features.clear_feature(CPU_AVX512PF);
1016     _features.clear_feature(CPU_AVX512VL);
1017     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1018     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1019     _features.clear_feature(CPU_AVX512_VAES);
1020     _features.clear_feature(CPU_AVX512_VNNI);
1021     _features.clear_feature(CPU_AVX512_VBMI);
1022     _features.clear_feature(CPU_AVX512_VBMI2);
1023     _features.clear_feature(CPU_AVX512_BITALG);
1024     _features.clear_feature(CPU_AVX512_IFMA);
1025     _features.clear_feature(CPU_APX_F);
1026     _features.clear_feature(CPU_AVX512_FP16);
1027     _features.clear_feature(CPU_AVX10_1);
1028     _features.clear_feature(CPU_AVX10_2);
1029   }
1030 
1031 
1032   if (UseAVX < 2) {
1033     _features.clear_feature(CPU_AVX2);
1034     _features.clear_feature(CPU_AVX_IFMA);
1035   }
1036 
1037   if (UseAVX < 1) {
1038     _features.clear_feature(CPU_AVX);
1039     _features.clear_feature(CPU_VZEROUPPER);
1040     _features.clear_feature(CPU_F16C);
1041     _features.clear_feature(CPU_SHA512);
1042   }
1043 
1044   if (logical_processors_per_package() == 1) {
1045     // HT processor could be installed on a system which doesn't support HT.
1046     _features.clear_feature(CPU_HT);
1047   }
1048 
1049   if (is_intel()) { // Intel cpus specific settings
1050     if (is_knights_family()) {
1051       _features.clear_feature(CPU_VZEROUPPER);
1052       _features.clear_feature(CPU_AVX512BW);
1053       _features.clear_feature(CPU_AVX512VL);
1054       _features.clear_feature(CPU_APX_F);
1055       _features.clear_feature(CPU_AVX512DQ);
1056       _features.clear_feature(CPU_AVX512_VNNI);
1057       _features.clear_feature(CPU_AVX512_VAES);
1058       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1059       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1060       _features.clear_feature(CPU_AVX512_VBMI);
1061       _features.clear_feature(CPU_AVX512_VBMI2);
1062       _features.clear_feature(CPU_CLWB);
1063       _features.clear_feature(CPU_FLUSHOPT);
1064       _features.clear_feature(CPU_GFNI);
1065       _features.clear_feature(CPU_AVX512_BITALG);
1066       _features.clear_feature(CPU_AVX512_IFMA);
1067       _features.clear_feature(CPU_AVX_IFMA);
1068       _features.clear_feature(CPU_AVX512_FP16);
1069       _features.clear_feature(CPU_AVX10_1);
1070       _features.clear_feature(CPU_AVX10_2);
1071     }
1072   }
1073 
1074     // Currently APX support is only enabled for targets supporting AVX512VL feature.
1075   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1076   if (UseAPX && !apx_supported) {
1077     warning("UseAPX is not supported on this CPU, setting it to false");
1078     FLAG_SET_DEFAULT(UseAPX, false);
1079   }
1080 
1081   if (!UseAPX) {
1082     _features.clear_feature(CPU_APX_F);
1083   }
1084 
1085   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1086     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1087     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1088   } else {
1089     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1090   }
1091 
1092   assert(supports_clflush(), "Always present");
1093   if (X86ICacheSync == -1) {
1094     // Auto-detect, choosing the best performant one that still flushes
1095     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1096     if (supports_clwb()) {
1097       FLAG_SET_ERGO(X86ICacheSync, 3);
1098     } else if (supports_clflushopt()) {
1099       FLAG_SET_ERGO(X86ICacheSync, 2);
1100     } else {
1101       FLAG_SET_ERGO(X86ICacheSync, 1);
1102     }
1103   } else {
1104     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1105       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1106     }
1107     if ((X86ICacheSync == 3) && !supports_clwb()) {
1108       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1109     }
1110     if ((X86ICacheSync == 5) && !supports_serialize()) {
1111       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1112     }
1113   }
1114 
1115   stringStream ss(2048);
1116   if (supports_hybrid()) {
1117     ss.print("(hybrid)");
1118   } else {
1119     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1120   }
1121   ss.print(" family %d model %d stepping %d microcode 0x%x",
1122            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1123   ss.print(", ");
1124   int features_offset = (int)ss.size();
1125   insert_features_names(_features, ss);
1126 
1127   _cpu_info_string = ss.as_string(true);
1128   _features_string = _cpu_info_string + features_offset;
1129 
1130   // Use AES instructions if available.
1131   if (supports_aes()) {
1132     if (FLAG_IS_DEFAULT(UseAES)) {
1133       FLAG_SET_DEFAULT(UseAES, true);
1134     }
1135     if (!UseAES) {
1136       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1137         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1138       }
1139       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1140     } else {
1141       if (UseSSE > 2) {
1142         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1143           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1144         }
1145       } else {
1146         // The AES intrinsic stubs require AES instruction support (of course)
1147         // but also require sse3 mode or higher for instructions it use.
1148         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1149           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1150         }
1151         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1152       }
1153 
1154       // --AES-CTR begins--
1155       if (!UseAESIntrinsics) {
1156         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1157           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1158           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1159         }
1160       } else {
1161         if (supports_sse4_1()) {
1162           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1163             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1164           }
1165         } else {
1166            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1167            // but also require sse4.1 mode or higher for instructions it use.
1168           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1169              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1170            }
1171            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1172         }
1173       }
1174       // --AES-CTR ends--
1175     }
1176   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1177     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1178       warning("AES instructions are not available on this CPU");
1179       FLAG_SET_DEFAULT(UseAES, false);
1180     }
1181     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1182       warning("AES intrinsics are not available on this CPU");
1183       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1184     }
1185     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1186       warning("AES-CTR intrinsics are not available on this CPU");
1187       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1188     }
1189   }
1190 
1191   // Use CLMUL instructions if available.
1192   if (supports_clmul()) {
1193     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1194       UseCLMUL = true;
1195     }
1196   } else if (UseCLMUL) {
1197     if (!FLAG_IS_DEFAULT(UseCLMUL))
1198       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1199     FLAG_SET_DEFAULT(UseCLMUL, false);
1200   }
1201 
1202   if (UseCLMUL && (UseSSE > 2)) {
1203     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1204       UseCRC32Intrinsics = true;
1205     }
1206   } else if (UseCRC32Intrinsics) {
1207     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1208       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1209     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1210   }
1211 
1212   if (supports_avx2()) {
1213     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1214       UseAdler32Intrinsics = true;
1215     }
1216   } else if (UseAdler32Intrinsics) {
1217     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1218       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1219     }
1220     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1221   }
1222 
1223   if (supports_sse4_2() && supports_clmul()) {
1224     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1225       UseCRC32CIntrinsics = true;
1226     }
1227   } else if (UseCRC32CIntrinsics) {
1228     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1229       warning("CRC32C intrinsics are not available on this CPU");
1230     }
1231     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1232   }
1233 
1234   // GHASH/GCM intrinsics
1235   if (UseCLMUL && (UseSSE > 2)) {
1236     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1237       UseGHASHIntrinsics = true;
1238     }
1239   } else if (UseGHASHIntrinsics) {
1240     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1241       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1242     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1243   }
1244 
1245   // ChaCha20 Intrinsics
1246   // As long as the system supports AVX as a baseline we can do a
1247   // SIMD-enabled block function.  StubGenerator makes the determination
1248   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1249   // version.
1250   if (UseAVX >= 1) {
1251       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1252           UseChaCha20Intrinsics = true;
1253       }
1254   } else if (UseChaCha20Intrinsics) {
1255       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1256           warning("ChaCha20 intrinsic requires AVX instructions");
1257       }
1258       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1259   }
1260 
1261   // Kyber Intrinsics
1262   // Currently we only have them for AVX512
1263   if (supports_evex() && supports_avx512bw()) {
1264       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1265           UseKyberIntrinsics = true;
1266       }
1267   } else
1268   if (UseKyberIntrinsics) {
1269      warning("Intrinsics for ML-KEM are not available on this CPU.");
1270      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1271   }
1272 
1273   // Dilithium Intrinsics
1274   // Currently we only have them for AVX512
1275   if (supports_evex() && supports_avx512bw()) {
1276       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1277           UseDilithiumIntrinsics = true;
1278       }
1279   } else if (UseDilithiumIntrinsics) {
1280       warning("Intrinsics for ML-DSA are not available on this CPU.");
1281       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1282   }
1283 
1284   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1285   if (UseAVX >= 2) {
1286     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1287       UseBASE64Intrinsics = true;
1288     }
1289   } else if (UseBASE64Intrinsics) {
1290      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1291       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1292     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1293   }
1294 
1295   if (supports_fma()) {
1296     if (FLAG_IS_DEFAULT(UseFMA)) {
1297       UseFMA = true;
1298     }
1299   } else if (UseFMA) {
1300     warning("FMA instructions are not available on this CPU");
1301     FLAG_SET_DEFAULT(UseFMA, false);
1302   }
1303 
1304   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1305     UseMD5Intrinsics = true;
1306   }
1307 
1308   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1309     if (FLAG_IS_DEFAULT(UseSHA)) {
1310       UseSHA = true;
1311     }
1312   } else if (UseSHA) {
1313     warning("SHA instructions are not available on this CPU");
1314     FLAG_SET_DEFAULT(UseSHA, false);
1315   }
1316 
1317   if (supports_sha() && supports_sse4_1() && UseSHA) {
1318     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1319       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1320     }
1321   } else if (UseSHA1Intrinsics) {
1322     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1323     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1324   }
1325 
1326   if (supports_sse4_1() && UseSHA) {
1327     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1328       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1329     }
1330   } else if (UseSHA256Intrinsics) {
1331     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1332     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1333   }
1334 
1335   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1336     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1337       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1338     }
1339   } else if (UseSHA512Intrinsics) {
1340     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1341     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1342   }
1343 
1344   if (supports_evex() && supports_avx512bw()) {
1345       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1346           UseSHA3Intrinsics = true;
1347       }
1348   } else if (UseSHA3Intrinsics) {
1349       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1350       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1351   }
1352 
1353   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1354     FLAG_SET_DEFAULT(UseSHA, false);
1355   }
1356 
1357 #if COMPILER2_OR_JVMCI
1358   int max_vector_size = 0;
1359   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1360     // 16 byte vectors (in XMM) are supported with SSE2+
1361     max_vector_size = 16;
1362   } else if (UseAVX == 1 || UseAVX == 2) {
1363     // 32 bytes vectors (in YMM) are only supported with AVX+
1364     max_vector_size = 32;
1365   } else if (UseAVX > 2) {
1366     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1367     max_vector_size = 64;
1368   }
1369 
1370   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1371 
1372   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1373     if (MaxVectorSize < min_vector_size) {
1374       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1375       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1376     }
1377     if (MaxVectorSize > max_vector_size) {
1378       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1379       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1380     }
1381     if (!is_power_of_2(MaxVectorSize)) {
1382       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1383       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1384     }
1385   } else {
1386     // If default, use highest supported configuration
1387     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1388   }
1389 
1390 #if defined(COMPILER2) && defined(ASSERT)
1391   if (MaxVectorSize > 0) {
1392     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1393       tty->print_cr("State of YMM registers after signal handle:");
1394       int nreg = 4;
1395       const char* ymm_name[4] = {"0", "7", "8", "15"};
1396       for (int i = 0; i < nreg; i++) {
1397         tty->print("YMM%s:", ymm_name[i]);
1398         for (int j = 7; j >=0; j--) {
1399           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1400         }
1401         tty->cr();
1402       }
1403     }
1404   }
1405 #endif // COMPILER2 && ASSERT
1406 
1407   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1408     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1409       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1410     }
1411   } else if (UsePoly1305Intrinsics) {
1412     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1413     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1414   }
1415 
1416   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1417     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1418       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1419     }
1420   } else if (UseIntPolyIntrinsics) {
1421     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1422     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1423   }
1424 
1425   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1426     UseMultiplyToLenIntrinsic = true;
1427   }
1428   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1429     UseSquareToLenIntrinsic = true;
1430   }
1431   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1432     UseMulAddIntrinsic = true;
1433   }
1434   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1435     UseMontgomeryMultiplyIntrinsic = true;
1436   }
1437   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1438     UseMontgomerySquareIntrinsic = true;
1439   }
1440 #endif // COMPILER2_OR_JVMCI
1441 
1442   // On new cpus instructions which update whole XMM register should be used
1443   // to prevent partial register stall due to dependencies on high half.
1444   //
1445   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1446   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1447   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1448   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1449 
1450 
1451   if (is_zx()) { // ZX cpus specific settings
1452     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1453       UseStoreImmI16 = false; // don't use it on ZX cpus
1454     }
1455     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1456       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1457         // Use it on all ZX cpus
1458         UseAddressNop = true;
1459       }
1460     }
1461     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1462       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1463     }
1464     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1465       if (supports_sse3()) {
1466         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1467       } else {
1468         UseXmmRegToRegMoveAll = false;
1469       }
1470     }
1471     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1472 #ifdef COMPILER2
1473       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1474         // For new ZX cpus do the next optimization:
1475         // don't align the beginning of a loop if there are enough instructions
1476         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1477         // in current fetch line (OptoLoopAlignment) or the padding
1478         // is big (> MaxLoopPad).
1479         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1480         // generated NOP instructions. 11 is the largest size of one
1481         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1482         MaxLoopPad = 11;
1483       }
1484 #endif // COMPILER2
1485       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1486         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1487       }
1488       if (supports_sse4_2()) { // new ZX cpus
1489         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1490           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1491         }
1492       }
1493     }
1494 
1495     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1496       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1497     }
1498   }
1499 
1500   if (is_amd_family()) { // AMD cpus specific settings
1501     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1502       // Use it on new AMD cpus starting from Opteron.
1503       UseAddressNop = true;
1504     }
1505     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1506       // Use it on new AMD cpus starting from Opteron.
1507       UseNewLongLShift = true;
1508     }
1509     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1510       if (supports_sse4a()) {
1511         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1512       } else {
1513         UseXmmLoadAndClearUpper = false;
1514       }
1515     }
1516     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1517       if (supports_sse4a()) {
1518         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1519       } else {
1520         UseXmmRegToRegMoveAll = false;
1521       }
1522     }
1523     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1524       if (supports_sse4a()) {
1525         UseXmmI2F = true;
1526       } else {
1527         UseXmmI2F = false;
1528       }
1529     }
1530     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1531       if (supports_sse4a()) {
1532         UseXmmI2D = true;
1533       } else {
1534         UseXmmI2D = false;
1535       }
1536     }
1537 
1538     // some defaults for AMD family 15h
1539     if (cpu_family() == 0x15) {
1540       // On family 15h processors default is no sw prefetch
1541       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1542         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1543       }
1544       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1545       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1546         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1547       }
1548       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1549       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1550         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1551       }
1552       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1553         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1554       }
1555     }
1556 
1557 #ifdef COMPILER2
1558     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1559       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1560       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1561     }
1562 #endif // COMPILER2
1563 
1564     // Some defaults for AMD family >= 17h && Hygon family 18h
1565     if (cpu_family() >= 0x17) {
1566       // On family >=17h processors use XMM and UnalignedLoadStores
1567       // for Array Copy
1568       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1569         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1570       }
1571       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1572         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1573       }
1574 #ifdef COMPILER2
1575       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1576         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1577       }
1578 #endif
1579     }
1580   }
1581 
1582   if (is_intel()) { // Intel cpus specific settings
1583     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1584       UseStoreImmI16 = false; // don't use it on Intel cpus
1585     }
1586     if (is_intel_server_family() || cpu_family() == 15) {
1587       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1588         // Use it on all Intel cpus starting from PentiumPro
1589         UseAddressNop = true;
1590       }
1591     }
1592     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1593       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1594     }
1595     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1596       if (supports_sse3()) {
1597         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1598       } else {
1599         UseXmmRegToRegMoveAll = false;
1600       }
1601     }
1602     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1603 #ifdef COMPILER2
1604       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1605         // For new Intel cpus do the next optimization:
1606         // don't align the beginning of a loop if there are enough instructions
1607         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1608         // in current fetch line (OptoLoopAlignment) or the padding
1609         // is big (> MaxLoopPad).
1610         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1611         // generated NOP instructions. 11 is the largest size of one
1612         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1613         MaxLoopPad = 11;
1614       }
1615 #endif // COMPILER2
1616 
1617       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1618         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1619       }
1620       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1621         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1622           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1623         }
1624       }
1625     }
1626     if (is_atom_family() || is_knights_family()) {
1627 #ifdef COMPILER2
1628       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1629         OptoScheduling = true;
1630       }
1631 #endif
1632       if (supports_sse4_2()) { // Silvermont
1633         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1634           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1635         }
1636       }
1637       if (FLAG_IS_DEFAULT(UseIncDec)) {
1638         FLAG_SET_DEFAULT(UseIncDec, false);
1639       }
1640     }
1641     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1642       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1643     }
1644 #ifdef COMPILER2
1645     if (UseAVX > 2) {
1646       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1647           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1648            ArrayOperationPartialInlineSize != 0 &&
1649            ArrayOperationPartialInlineSize != 16 &&
1650            ArrayOperationPartialInlineSize != 32 &&
1651            ArrayOperationPartialInlineSize != 64)) {
1652         int inline_size = 0;
1653         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1654           inline_size = 64;
1655         } else if (MaxVectorSize >= 32) {
1656           inline_size = 32;
1657         } else if (MaxVectorSize >= 16) {
1658           inline_size = 16;
1659         }
1660         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1661           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1662         }
1663         ArrayOperationPartialInlineSize = inline_size;
1664       }
1665 
1666       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1667         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1668         if (ArrayOperationPartialInlineSize) {
1669           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1670         } else {
1671           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1672         }
1673       }
1674     }
1675 #endif
1676   }
1677 
1678 #ifdef COMPILER2
1679   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1680     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1681       OptimizeFill = false;
1682     }
1683   }
1684 #endif
1685   if (supports_sse4_2()) {
1686     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1687       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1688     }
1689   } else {
1690     if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1691       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1692     }
1693     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1694   }
1695   if (UseSSE42Intrinsics) {
1696     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1697       UseVectorizedMismatchIntrinsic = true;
1698     }
1699   } else if (UseVectorizedMismatchIntrinsic) {
1700     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1701       warning("vectorizedMismatch intrinsics are not available on this CPU");
1702     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1703   }
1704   if (UseAVX >= 2) {
1705     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1706   } else if (UseVectorizedHashCodeIntrinsic) {
1707     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1708       warning("vectorizedHashCode intrinsics are not available on this CPU");
1709     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1710   }
1711 
1712   // Use count leading zeros count instruction if available.
1713   if (supports_lzcnt()) {
1714     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1715       UseCountLeadingZerosInstruction = true;
1716     }
1717    } else if (UseCountLeadingZerosInstruction) {
1718     warning("lzcnt instruction is not available on this CPU");
1719     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1720   }
1721 
1722   // Use count trailing zeros instruction if available
1723   if (supports_bmi1()) {
1724     // tzcnt does not require VEX prefix
1725     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1726       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1727         // Don't use tzcnt if BMI1 is switched off on command line.
1728         UseCountTrailingZerosInstruction = false;
1729       } else {
1730         UseCountTrailingZerosInstruction = true;
1731       }
1732     }
1733   } else if (UseCountTrailingZerosInstruction) {
1734     warning("tzcnt instruction is not available on this CPU");
1735     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1736   }
1737 
1738   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1739   // VEX prefix is generated only when AVX > 0.
1740   if (supports_bmi1() && supports_avx()) {
1741     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1742       UseBMI1Instructions = true;
1743     }
1744   } else if (UseBMI1Instructions) {
1745     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1746     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1747   }
1748 
1749   if (supports_bmi2() && supports_avx()) {
1750     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1751       UseBMI2Instructions = true;
1752     }
1753   } else if (UseBMI2Instructions) {
1754     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1755     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1756   }
1757 
1758   // Use population count instruction if available.
1759   if (supports_popcnt()) {
1760     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1761       UsePopCountInstruction = true;
1762     }
1763   } else if (UsePopCountInstruction) {
1764     warning("POPCNT instruction is not available on this CPU");
1765     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1766   }
1767 
1768   // Use fast-string operations if available.
1769   if (supports_erms()) {
1770     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1771       UseFastStosb = true;
1772     }
1773   } else if (UseFastStosb) {
1774     warning("fast-string operations are not available on this CPU");
1775     FLAG_SET_DEFAULT(UseFastStosb, false);
1776   }
1777 
1778   // For AMD Processors use XMM/YMM MOVDQU instructions
1779   // for Object Initialization as default
1780   if (is_amd() && cpu_family() >= 0x19) {
1781     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1782       UseFastStosb = false;
1783     }
1784   }
1785 
1786 #ifdef COMPILER2
1787   if (is_intel() && MaxVectorSize > 16) {
1788     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1789       UseFastStosb = false;
1790     }
1791   }
1792 #endif
1793 
1794   // Use XMM/YMM MOVDQU instruction for Object Initialization
1795   if (!UseFastStosb && UseUnalignedLoadStores) {
1796     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1797       UseXMMForObjInit = true;
1798     }
1799   } else if (UseXMMForObjInit) {
1800     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1801     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1802   }
1803 
1804 #ifdef COMPILER2
1805   if (FLAG_IS_DEFAULT(AlignVector)) {
1806     // Modern processors allow misaligned memory operations for vectors.
1807     AlignVector = !UseUnalignedLoadStores;
1808   }
1809 #endif // COMPILER2
1810 
1811   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1812     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1813       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1814     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1815       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1816     }
1817   }
1818 
1819   // Allocation prefetch settings
1820   int cache_line_size = checked_cast<int>(prefetch_data_size());
1821   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1822       (cache_line_size > AllocatePrefetchStepSize)) {
1823     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1824   }
1825 
1826   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1827     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1828     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1829       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1830     }
1831     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1832   }
1833 
1834   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1835     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1836     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1837   }
1838 
1839   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1840     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1841         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1842       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1843     }
1844 #ifdef COMPILER2
1845     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1846       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1847     }
1848 #endif
1849   }
1850 
1851   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1852 #ifdef COMPILER2
1853     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1854       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1855     }
1856 #endif
1857   }
1858 
1859   // Prefetch settings
1860 
1861   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1862   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1863   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1864   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1865 
1866   // gc copy/scan is disabled if prefetchw isn't supported, because
1867   // Prefetch::write emits an inlined prefetchw on Linux.
1868   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1869   // The used prefetcht0 instruction works for both amd64 and em64t.
1870 
1871   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1872     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1873   }
1874   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1875     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1876   }
1877 
1878   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1879      (cache_line_size > ContendedPaddingWidth))
1880      ContendedPaddingWidth = cache_line_size;
1881 
1882   // This machine allows unaligned memory accesses
1883   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1884     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1885   }
1886 
1887 #ifndef PRODUCT
1888   if (log_is_enabled(Info, os, cpu)) {
1889     LogStream ls(Log(os, cpu)::info());
1890     outputStream* log = &ls;
1891     log->print_cr("Logical CPUs per core: %u",
1892                   logical_processors_per_package());
1893     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1894     log->print("UseSSE=%d", UseSSE);
1895     if (UseAVX > 0) {
1896       log->print("  UseAVX=%d", UseAVX);
1897     }
1898     if (UseAES) {
1899       log->print("  UseAES=1");
1900     }
1901 #ifdef COMPILER2
1902     if (MaxVectorSize > 0) {
1903       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1904     }
1905 #endif
1906     log->cr();
1907     log->print("Allocation");
1908     if (AllocatePrefetchStyle <= 0) {
1909       log->print_cr(": no prefetching");
1910     } else {
1911       log->print(" prefetching: ");
1912       if (AllocatePrefetchInstr == 0) {
1913         log->print("PREFETCHNTA");
1914       } else if (AllocatePrefetchInstr == 1) {
1915         log->print("PREFETCHT0");
1916       } else if (AllocatePrefetchInstr == 2) {
1917         log->print("PREFETCHT2");
1918       } else if (AllocatePrefetchInstr == 3) {
1919         log->print("PREFETCHW");
1920       }
1921       if (AllocatePrefetchLines > 1) {
1922         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1923       } else {
1924         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1925       }
1926     }
1927 
1928     if (PrefetchCopyIntervalInBytes > 0) {
1929       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1930     }
1931     if (PrefetchScanIntervalInBytes > 0) {
1932       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1933     }
1934     if (ContendedPaddingWidth > 0) {
1935       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1936     }
1937   }
1938 #endif // !PRODUCT
1939   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1940       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1941   }
1942   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1943       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1944   }
1945 }
1946 
1947 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1948   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1949   if (vrt == XenHVM) {
1950     st->print_cr("Xen hardware-assisted virtualization detected");
1951   } else if (vrt == KVM) {
1952     st->print_cr("KVM virtualization detected");
1953   } else if (vrt == VMWare) {
1954     st->print_cr("VMWare virtualization detected");
1955     VirtualizationSupport::print_virtualization_info(st);
1956   } else if (vrt == HyperV) {
1957     st->print_cr("Hyper-V virtualization detected");
1958   } else if (vrt == HyperVRole) {
1959     st->print_cr("Hyper-V role detected");
1960   }
1961 }
1962 
1963 bool VM_Version::compute_has_intel_jcc_erratum() {
1964   if (!is_intel_family_core()) {
1965     // Only Intel CPUs are affected.
1966     return false;
1967   }
1968   // The following table of affected CPUs is based on the following document released by Intel:
1969   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1970   switch (_model) {
1971   case 0x8E:
1972     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1973     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1974     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1975     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1976     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1977     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1978     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1979     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1980     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1981     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1982   case 0x4E:
1983     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1984     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1985     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1986     return _stepping == 0x3;
1987   case 0x55:
1988     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1989     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1990     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1991     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1992     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1993     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1994     return _stepping == 0x4 || _stepping == 0x7;
1995   case 0x5E:
1996     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1997     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1998     return _stepping == 0x3;
1999   case 0x9E:
2000     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2001     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2002     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2003     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2004     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2005     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2006     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2007     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2008     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2009     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2010     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2011     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2012     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2013     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2014     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2015   case 0xA5:
2016     // Not in Intel documentation.
2017     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2018     return true;
2019   case 0xA6:
2020     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2021     return _stepping == 0x0;
2022   case 0xAE:
2023     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2024     return _stepping == 0xA;
2025   default:
2026     // If we are running on another intel machine not recognized in the table, we are okay.
2027     return false;
2028   }
2029 }
2030 
2031 // On Xen, the cpuid instruction returns
2032 //  eax / registers[0]: Version of Xen
2033 //  ebx / registers[1]: chars 'XenV'
2034 //  ecx / registers[2]: chars 'MMXe'
2035 //  edx / registers[3]: chars 'nVMM'
2036 //
2037 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2038 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2039 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2040 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2041 //
2042 // more information :
2043 // https://kb.vmware.com/s/article/1009458
2044 //
2045 void VM_Version::check_virtualizations() {
2046   uint32_t registers[4] = {0};
2047   char signature[13] = {0};
2048 
2049   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2050   // from 0x40000000 until 0x40010000.
2051   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2052   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2053     detect_virt_stub(leaf, registers);
2054     memcpy(signature, &registers[1], 12);
2055 
2056     if (strncmp("VMwareVMware", signature, 12) == 0) {
2057       Abstract_VM_Version::_detected_virtualization = VMWare;
2058       // check for extended metrics from guestlib
2059       VirtualizationSupport::initialize();
2060     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2061       Abstract_VM_Version::_detected_virtualization = HyperV;
2062 #ifdef _WINDOWS
2063       // CPUID leaf 0x40000007 is available to the root partition only.
2064       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2065       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2066       detect_virt_stub(0x40000007, registers);
2067       if ((registers[0] != 0x0) ||
2068           (registers[1] != 0x0) ||
2069           (registers[2] != 0x0) ||
2070           (registers[3] != 0x0)) {
2071         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2072       }
2073 #endif
2074     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2075       Abstract_VM_Version::_detected_virtualization = KVM;
2076     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2077       Abstract_VM_Version::_detected_virtualization = XenHVM;
2078     }
2079   }
2080 }
2081 
2082 #ifdef COMPILER2
2083 // Determine if it's running on Cascade Lake using default options.
2084 bool VM_Version::is_default_intel_cascade_lake() {
2085   return FLAG_IS_DEFAULT(UseAVX) &&
2086          FLAG_IS_DEFAULT(MaxVectorSize) &&
2087          UseAVX > 2 &&
2088          is_intel_cascade_lake();
2089 }
2090 #endif
2091 
2092 bool VM_Version::is_intel_cascade_lake() {
2093   return is_intel_skylake() && _stepping >= 5;
2094 }
2095 
2096 bool VM_Version::is_intel_darkmont() {
2097   return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2098 }
2099 
2100 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2101 // for implementing the array copy and clear operations.
2102 // The Intel platforms that supports the serialize instruction
2103 // has improved implementation of 64-byte load/stores and so the default
2104 // threshold is set to 0 for these platforms.
2105 int VM_Version::avx3_threshold() {
2106   return (is_intel_server_family() &&
2107           supports_serialize() &&
2108           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2109 }
2110 
2111 void VM_Version::clear_apx_test_state() {
2112   clear_apx_test_state_stub();
2113 }
2114 
2115 static bool _vm_version_initialized = false;
2116 
2117 void VM_Version::initialize() {
2118   ResourceMark rm;
2119 
2120   // Making this stub must be FIRST use of assembler
2121   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2122   if (stub_blob == nullptr) {
2123     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2124   }
2125   CodeBuffer c(stub_blob);
2126   VM_Version_StubGenerator g(&c);
2127 
2128   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2129                                      g.generate_get_cpu_info());
2130   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2131                                      g.generate_detect_virt());
2132   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2133                                      g.clear_apx_test_state());
2134   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2135                                      g.generate_getCPUIDBrandString());
2136   get_processor_features();
2137 
2138   Assembler::precompute_instructions();
2139 
2140   if (VM_Version::supports_hv()) { // Supports hypervisor
2141     check_virtualizations();
2142   }
2143   _vm_version_initialized = true;
2144 }
2145 
2146 typedef enum {
2147    CPU_FAMILY_8086_8088  = 0,
2148    CPU_FAMILY_INTEL_286  = 2,
2149    CPU_FAMILY_INTEL_386  = 3,
2150    CPU_FAMILY_INTEL_486  = 4,
2151    CPU_FAMILY_PENTIUM    = 5,
2152    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2153    CPU_FAMILY_PENTIUM_4  = 0xF
2154 } FamilyFlag;
2155 
2156 typedef enum {
2157   RDTSCP_FLAG  = 0x08000000, // bit 27
2158   INTEL64_FLAG = 0x20000000  // bit 29
2159 } _featureExtendedEdxFlag;
2160 
2161 typedef enum {
2162    FPU_FLAG     = 0x00000001,
2163    VME_FLAG     = 0x00000002,
2164    DE_FLAG      = 0x00000004,
2165    PSE_FLAG     = 0x00000008,
2166    TSC_FLAG     = 0x00000010,
2167    MSR_FLAG     = 0x00000020,
2168    PAE_FLAG     = 0x00000040,
2169    MCE_FLAG     = 0x00000080,
2170    CX8_FLAG     = 0x00000100,
2171    APIC_FLAG    = 0x00000200,
2172    SEP_FLAG     = 0x00000800,
2173    MTRR_FLAG    = 0x00001000,
2174    PGE_FLAG     = 0x00002000,
2175    MCA_FLAG     = 0x00004000,
2176    CMOV_FLAG    = 0x00008000,
2177    PAT_FLAG     = 0x00010000,
2178    PSE36_FLAG   = 0x00020000,
2179    PSNUM_FLAG   = 0x00040000,
2180    CLFLUSH_FLAG = 0x00080000,
2181    DTS_FLAG     = 0x00200000,
2182    ACPI_FLAG    = 0x00400000,
2183    MMX_FLAG     = 0x00800000,
2184    FXSR_FLAG    = 0x01000000,
2185    SSE_FLAG     = 0x02000000,
2186    SSE2_FLAG    = 0x04000000,
2187    SS_FLAG      = 0x08000000,
2188    HTT_FLAG     = 0x10000000,
2189    TM_FLAG      = 0x20000000
2190 } FeatureEdxFlag;
2191 
2192 // VM_Version statics
2193 enum {
2194   ExtendedFamilyIdLength_INTEL = 16,
2195   ExtendedFamilyIdLength_AMD   = 24
2196 };
2197 
2198 const size_t VENDOR_LENGTH = 13;
2199 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2200 static char* _cpu_brand_string = nullptr;
2201 static int64_t _max_qualified_cpu_frequency = 0;
2202 
2203 static int _no_of_threads = 0;
2204 static int _no_of_cores = 0;
2205 
2206 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2207   "8086/8088",
2208   "",
2209   "286",
2210   "386",
2211   "486",
2212   "Pentium",
2213   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2214   "",
2215   "",
2216   "",
2217   "",
2218   "",
2219   "",
2220   "",
2221   "",
2222   "Pentium 4"
2223 };
2224 
2225 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2226   "",
2227   "",
2228   "",
2229   "",
2230   "5x86",
2231   "K5/K6",
2232   "Athlon/AthlonXP",
2233   "",
2234   "",
2235   "",
2236   "",
2237   "",
2238   "",
2239   "",
2240   "",
2241   "Opteron/Athlon64",
2242   "Opteron QC/Phenom",  // Barcelona et.al.
2243   "",
2244   "",
2245   "",
2246   "",
2247   "",
2248   "",
2249   "Zen"
2250 };
2251 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2252 // September 2013, Vol 3C Table 35-1
2253 const char* const _model_id_pentium_pro[] = {
2254   "",
2255   "Pentium Pro",
2256   "",
2257   "Pentium II model 3",
2258   "",
2259   "Pentium II model 5/Xeon/Celeron",
2260   "Celeron",
2261   "Pentium III/Pentium III Xeon",
2262   "Pentium III/Pentium III Xeon",
2263   "Pentium M model 9",    // Yonah
2264   "Pentium III, model A",
2265   "Pentium III, model B",
2266   "",
2267   "Pentium M model D",    // Dothan
2268   "",
2269   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2270   "",
2271   "",
2272   "",
2273   "",
2274   "",
2275   "",
2276   "Celeron",              // 0x16 Celeron 65nm
2277   "Core 2",               // 0x17 Penryn / Harpertown
2278   "",
2279   "",
2280   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2281   "Atom",                 // 0x1B Z5xx series Silverthorn
2282   "",
2283   "Core 2",               // 0x1D Dunnington (6-core)
2284   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2285   "",
2286   "",
2287   "",
2288   "",
2289   "",
2290   "",
2291   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2292   "",
2293   "",
2294   "",                     // 0x28
2295   "",
2296   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2297   "",
2298   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2299   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2300   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2301   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2302   "",
2303   "",
2304   "",
2305   "",
2306   "",
2307   "",
2308   "",
2309   "",
2310   "",
2311   "",
2312   "Ivy Bridge",           // 0x3a
2313   "",
2314   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2315   "",                     // 0x3d "Next Generation Intel Core Processor"
2316   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2317   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2318   "",
2319   "",
2320   "",
2321   "",
2322   "",
2323   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2324   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2325   nullptr
2326 };
2327 
2328 /* Brand ID is for back compatibility
2329  * Newer CPUs uses the extended brand string */
2330 const char* const _brand_id[] = {
2331   "",
2332   "Celeron processor",
2333   "Pentium III processor",
2334   "Intel Pentium III Xeon processor",
2335   "",
2336   "",
2337   "",
2338   "",
2339   "Intel Pentium 4 processor",
2340   nullptr
2341 };
2342 
2343 
2344 const char* const _feature_edx_id[] = {
2345   "On-Chip FPU",
2346   "Virtual Mode Extensions",
2347   "Debugging Extensions",
2348   "Page Size Extensions",
2349   "Time Stamp Counter",
2350   "Model Specific Registers",
2351   "Physical Address Extension",
2352   "Machine Check Exceptions",
2353   "CMPXCHG8B Instruction",
2354   "On-Chip APIC",
2355   "",
2356   "Fast System Call",
2357   "Memory Type Range Registers",
2358   "Page Global Enable",
2359   "Machine Check Architecture",
2360   "Conditional Mov Instruction",
2361   "Page Attribute Table",
2362   "36-bit Page Size Extension",
2363   "Processor Serial Number",
2364   "CLFLUSH Instruction",
2365   "",
2366   "Debug Trace Store feature",
2367   "ACPI registers in MSR space",
2368   "Intel Architecture MMX Technology",
2369   "Fast Float Point Save and Restore",
2370   "Streaming SIMD extensions",
2371   "Streaming SIMD extensions 2",
2372   "Self-Snoop",
2373   "Hyper Threading",
2374   "Thermal Monitor",
2375   "",
2376   "Pending Break Enable"
2377 };
2378 
2379 const char* const _feature_extended_edx_id[] = {
2380   "",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "SYSCALL/SYSRET",
2392   "",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "",
2399   "",
2400   "Execute Disable Bit",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "RDTSCP",
2408   "",
2409   "Intel 64 Architecture",
2410   "",
2411   ""
2412 };
2413 
2414 const char* const _feature_ecx_id[] = {
2415   "Streaming SIMD Extensions 3",
2416   "PCLMULQDQ",
2417   "64-bit DS Area",
2418   "MONITOR/MWAIT instructions",
2419   "CPL Qualified Debug Store",
2420   "Virtual Machine Extensions",
2421   "Safer Mode Extensions",
2422   "Enhanced Intel SpeedStep technology",
2423   "Thermal Monitor 2",
2424   "Supplemental Streaming SIMD Extensions 3",
2425   "L1 Context ID",
2426   "",
2427   "Fused Multiply-Add",
2428   "CMPXCHG16B",
2429   "xTPR Update Control",
2430   "Perfmon and Debug Capability",
2431   "",
2432   "Process-context identifiers",
2433   "Direct Cache Access",
2434   "Streaming SIMD extensions 4.1",
2435   "Streaming SIMD extensions 4.2",
2436   "x2APIC",
2437   "MOVBE",
2438   "Popcount instruction",
2439   "TSC-Deadline",
2440   "AESNI",
2441   "XSAVE",
2442   "OSXSAVE",
2443   "AVX",
2444   "F16C",
2445   "RDRAND",
2446   ""
2447 };
2448 
2449 const char* const _feature_extended_ecx_id[] = {
2450   "LAHF/SAHF instruction support",
2451   "Core multi-processor legacy mode",
2452   "",
2453   "",
2454   "",
2455   "Advanced Bit Manipulations: LZCNT",
2456   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2457   "Misaligned SSE mode",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   ""
2482 };
2483 
2484 const char* VM_Version::cpu_model_description(void) {
2485   uint32_t cpu_family = extended_cpu_family();
2486   uint32_t cpu_model = extended_cpu_model();
2487   const char* model = nullptr;
2488 
2489   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2490     for (uint32_t i = 0; i <= cpu_model; i++) {
2491       model = _model_id_pentium_pro[i];
2492       if (model == nullptr) {
2493         break;
2494       }
2495     }
2496   }
2497   return model;
2498 }
2499 
2500 const char* VM_Version::cpu_brand_string(void) {
2501   if (_cpu_brand_string == nullptr) {
2502     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2503     if (nullptr == _cpu_brand_string) {
2504       return nullptr;
2505     }
2506     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2507     if (ret_val != OS_OK) {
2508       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2509       _cpu_brand_string = nullptr;
2510     }
2511   }
2512   return _cpu_brand_string;
2513 }
2514 
2515 const char* VM_Version::cpu_brand(void) {
2516   const char*  brand  = nullptr;
2517 
2518   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2519     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2520     brand = _brand_id[0];
2521     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2522       brand = _brand_id[i];
2523     }
2524   }
2525   return brand;
2526 }
2527 
2528 bool VM_Version::cpu_is_em64t(void) {
2529   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2530 }
2531 
2532 bool VM_Version::is_netburst(void) {
2533   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2534 }
2535 
2536 bool VM_Version::supports_tscinv_ext(void) {
2537   if (!supports_tscinv_bit()) {
2538     return false;
2539   }
2540 
2541   if (is_intel()) {
2542     return true;
2543   }
2544 
2545   if (is_amd()) {
2546     return !is_amd_Barcelona();
2547   }
2548 
2549   if (is_hygon()) {
2550     return true;
2551   }
2552 
2553   return false;
2554 }
2555 
2556 void VM_Version::resolve_cpu_information_details(void) {
2557 
2558   // in future we want to base this information on proper cpu
2559   // and cache topology enumeration such as:
2560   // Intel 64 Architecture Processor Topology Enumeration
2561   // which supports system cpu and cache topology enumeration
2562   // either using 2xAPICIDs or initial APICIDs
2563 
2564   // currently only rough cpu information estimates
2565   // which will not necessarily reflect the exact configuration of the system
2566 
2567   // this is the number of logical hardware threads
2568   // visible to the operating system
2569   _no_of_threads = os::processor_count();
2570 
2571   // find out number of threads per cpu package
2572   int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2573   if (threads_per_package == 0) {
2574     // Fallback code to avoid div by zero in subsequent code.
2575     // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2576     threads_per_package = threads_per_core() * cores_per_cpu();
2577   }
2578 
2579   // use amount of threads visible to the process in order to guess number of sockets
2580   _no_of_sockets = _no_of_threads / threads_per_package;
2581 
2582   // process might only see a subset of the total number of threads
2583   // from a single processor package. Virtualization/resource management for example.
2584   // If so then just write a hard 1 as num of pkgs.
2585   if (0 == _no_of_sockets) {
2586     _no_of_sockets = 1;
2587   }
2588 
2589   // estimate the number of cores
2590   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2591 }
2592 
2593 
2594 const char* VM_Version::cpu_family_description(void) {
2595   int cpu_family_id = extended_cpu_family();
2596   if (is_amd()) {
2597     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2598       return _family_id_amd[cpu_family_id];
2599     }
2600   }
2601   if (is_intel()) {
2602     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2603       return cpu_model_description();
2604     }
2605     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2606       return _family_id_intel[cpu_family_id];
2607     }
2608   }
2609   if (is_hygon()) {
2610     return "Dhyana";
2611   }
2612   return "Unknown x86";
2613 }
2614 
2615 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2616   assert(buf != nullptr, "buffer is null!");
2617   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2618 
2619   const char* cpu_type = nullptr;
2620   const char* x64 = nullptr;
2621 
2622   if (is_intel()) {
2623     cpu_type = "Intel";
2624     x64 = cpu_is_em64t() ? " Intel64" : "";
2625   } else if (is_amd()) {
2626     cpu_type = "AMD";
2627     x64 = cpu_is_em64t() ? " AMD64" : "";
2628   } else if (is_hygon()) {
2629     cpu_type = "Hygon";
2630     x64 = cpu_is_em64t() ? " AMD64" : "";
2631   } else {
2632     cpu_type = "Unknown x86";
2633     x64 = cpu_is_em64t() ? " x86_64" : "";
2634   }
2635 
2636   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2637     cpu_type,
2638     cpu_family_description(),
2639     supports_ht() ? " (HT)" : "",
2640     supports_sse3() ? " SSE3" : "",
2641     supports_ssse3() ? " SSSE3" : "",
2642     supports_sse4_1() ? " SSE4.1" : "",
2643     supports_sse4_2() ? " SSE4.2" : "",
2644     supports_sse4a() ? " SSE4A" : "",
2645     is_netburst() ? " Netburst" : "",
2646     is_intel_family_core() ? " Core" : "",
2647     x64);
2648 
2649   return OS_OK;
2650 }
2651 
2652 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2653   assert(buf != nullptr, "buffer is null!");
2654   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2655   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2656 
2657   // invoke newly generated asm code to fetch CPU Brand String
2658   getCPUIDBrandString_stub(&_cpuid_info);
2659 
2660   // fetch results into buffer
2661   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2662   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2663   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2664   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2665   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2666   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2667   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2668   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2669   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2670   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2671   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2672   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2673 
2674   return OS_OK;
2675 }
2676 
2677 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2678   guarantee(buf != nullptr, "buffer is null!");
2679   guarantee(buf_len > 0, "buffer len not enough!");
2680 
2681   unsigned int flag = 0;
2682   unsigned int fi = 0;
2683   size_t       written = 0;
2684   const char*  prefix = "";
2685 
2686 #define WRITE_TO_BUF(string)                                                          \
2687   {                                                                                   \
2688     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2689     if (res < 0) {                                                                    \
2690       return buf_len - 1;                                                             \
2691     }                                                                                 \
2692     written += res;                                                                   \
2693     if (prefix[0] == '\0') {                                                          \
2694       prefix = ", ";                                                                  \
2695     }                                                                                 \
2696   }
2697 
2698   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2699     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2700       continue; /* no hyperthreading */
2701     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2702       continue; /* no fast system call */
2703     }
2704     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2705       WRITE_TO_BUF(_feature_edx_id[fi]);
2706     }
2707   }
2708 
2709   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2710     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2711       WRITE_TO_BUF(_feature_ecx_id[fi]);
2712     }
2713   }
2714 
2715   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2716     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2717       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2718     }
2719   }
2720 
2721   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2722     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2723       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2724     }
2725   }
2726 
2727   if (supports_tscinv_bit()) {
2728       WRITE_TO_BUF("Invariant TSC");
2729   }
2730 
2731   if (supports_hybrid()) {
2732       WRITE_TO_BUF("Hybrid Architecture");
2733   }
2734 
2735   return written;
2736 }
2737 
2738 /**
2739  * Write a detailed description of the cpu to a given buffer, including
2740  * feature set.
2741  */
2742 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2743   assert(buf != nullptr, "buffer is null!");
2744   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2745 
2746   static const char* unknown = "<unknown>";
2747   char               vendor_id[VENDOR_LENGTH];
2748   const char*        family = nullptr;
2749   const char*        model = nullptr;
2750   const char*        brand = nullptr;
2751   int                outputLen = 0;
2752 
2753   family = cpu_family_description();
2754   if (family == nullptr) {
2755     family = unknown;
2756   }
2757 
2758   model = cpu_model_description();
2759   if (model == nullptr) {
2760     model = unknown;
2761   }
2762 
2763   brand = cpu_brand_string();
2764 
2765   if (brand == nullptr) {
2766     brand = cpu_brand();
2767     if (brand == nullptr) {
2768       brand = unknown;
2769     }
2770   }
2771 
2772   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2773   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2774   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2775   vendor_id[VENDOR_LENGTH-1] = '\0';
2776 
2777   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2778     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2779     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2780     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2781     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2782     "Supports: ",
2783     brand,
2784     vendor_id,
2785     family,
2786     extended_cpu_family(),
2787     model,
2788     extended_cpu_model(),
2789     cpu_stepping(),
2790     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2791     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2792     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2793     _cpuid_info.std_cpuid1_eax.value,
2794     _cpuid_info.std_cpuid1_ebx.value,
2795     _cpuid_info.std_cpuid1_ecx.value,
2796     _cpuid_info.std_cpuid1_edx.value,
2797     _cpuid_info.ext_cpuid1_eax,
2798     _cpuid_info.ext_cpuid1_ebx,
2799     _cpuid_info.ext_cpuid1_ecx,
2800     _cpuid_info.ext_cpuid1_edx);
2801 
2802   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2803     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2804     return OS_ERR;
2805   }
2806 
2807   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2808 
2809   return OS_OK;
2810 }
2811 
2812 
2813 // Fill in Abstract_VM_Version statics
2814 void VM_Version::initialize_cpu_information() {
2815   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2816   assert(!_initialized, "shouldn't be initialized yet");
2817   resolve_cpu_information_details();
2818 
2819   // initialize cpu_name and cpu_desc
2820   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2821   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2822   _initialized = true;
2823 }
2824 
2825 /**
2826  *  For information about extracting the frequency from the cpu brand string, please see:
2827  *
2828  *    Intel Processor Identification and the CPUID Instruction
2829  *    Application Note 485
2830  *    May 2012
2831  *
2832  * The return value is the frequency in Hz.
2833  */
2834 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2835   const char* const brand_string = cpu_brand_string();
2836   if (brand_string == nullptr) {
2837     return 0;
2838   }
2839   const int64_t MEGA = 1000000;
2840   int64_t multiplier = 0;
2841   int64_t frequency = 0;
2842   uint8_t idx = 0;
2843   // The brand string buffer is at most 48 bytes.
2844   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2845   for (; idx < 48-2; ++idx) {
2846     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2847     // Search brand string for "yHz" where y is M, G, or T.
2848     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2849       if (brand_string[idx] == 'M') {
2850         multiplier = MEGA;
2851       } else if (brand_string[idx] == 'G') {
2852         multiplier = MEGA * 1000;
2853       } else if (brand_string[idx] == 'T') {
2854         multiplier = MEGA * MEGA;
2855       }
2856       break;
2857     }
2858   }
2859   if (multiplier > 0) {
2860     // Compute frequency (in Hz) from brand string.
2861     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2862       frequency =  (brand_string[idx-4] - '0') * multiplier;
2863       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2864       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2865     } else { // format is "xxxx"
2866       frequency =  (brand_string[idx-4] - '0') * 1000;
2867       frequency += (brand_string[idx-3] - '0') * 100;
2868       frequency += (brand_string[idx-2] - '0') * 10;
2869       frequency += (brand_string[idx-1] - '0');
2870       frequency *= multiplier;
2871     }
2872   }
2873   return frequency;
2874 }
2875 
2876 
2877 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2878   if (_max_qualified_cpu_frequency == 0) {
2879     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2880   }
2881   return _max_qualified_cpu_frequency;
2882 }
2883 
2884 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2885   VM_Features vm_features;
2886   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2887     vm_features.set_feature(CPU_CX8);
2888   if (std_cpuid1_edx.bits.cmov != 0)
2889     vm_features.set_feature(CPU_CMOV);
2890   if (std_cpuid1_edx.bits.clflush != 0)
2891     vm_features.set_feature(CPU_FLUSH);
2892   // clflush should always be available on x86_64
2893   // if not we are in real trouble because we rely on it
2894   // to flush the code cache.
2895   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2896   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2897       ext_cpuid1_edx.bits.fxsr != 0))
2898     vm_features.set_feature(CPU_FXSR);
2899   // HT flag is set for multi-core processors also.
2900   if (threads_per_core() > 1)
2901     vm_features.set_feature(CPU_HT);
2902   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2903       ext_cpuid1_edx.bits.mmx != 0))
2904     vm_features.set_feature(CPU_MMX);
2905   if (std_cpuid1_edx.bits.sse != 0)
2906     vm_features.set_feature(CPU_SSE);
2907   if (std_cpuid1_edx.bits.sse2 != 0)
2908     vm_features.set_feature(CPU_SSE2);
2909   if (std_cpuid1_ecx.bits.sse3 != 0)
2910     vm_features.set_feature(CPU_SSE3);
2911   if (std_cpuid1_ecx.bits.ssse3 != 0)
2912     vm_features.set_feature(CPU_SSSE3);
2913   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2914     vm_features.set_feature(CPU_SSE4_1);
2915   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2916     vm_features.set_feature(CPU_SSE4_2);
2917   if (std_cpuid1_ecx.bits.popcnt != 0)
2918     vm_features.set_feature(CPU_POPCNT);
2919   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2920       xem_xcr0_eax.bits.apx_f != 0 &&
2921       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2922     vm_features.set_feature(CPU_APX_F);
2923   }
2924   if (std_cpuid1_ecx.bits.avx != 0 &&
2925       std_cpuid1_ecx.bits.osxsave != 0 &&
2926       xem_xcr0_eax.bits.sse != 0 &&
2927       xem_xcr0_eax.bits.ymm != 0) {
2928     vm_features.set_feature(CPU_AVX);
2929     vm_features.set_feature(CPU_VZEROUPPER);
2930     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2931       vm_features.set_feature(CPU_SHA512);
2932     if (std_cpuid1_ecx.bits.f16c != 0)
2933       vm_features.set_feature(CPU_F16C);
2934     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2935       vm_features.set_feature(CPU_AVX2);
2936       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2937         vm_features.set_feature(CPU_AVX_IFMA);
2938     }
2939     if (sef_cpuid7_ecx.bits.gfni != 0)
2940         vm_features.set_feature(CPU_GFNI);
2941     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2942         xem_xcr0_eax.bits.opmask != 0 &&
2943         xem_xcr0_eax.bits.zmm512 != 0 &&
2944         xem_xcr0_eax.bits.zmm32 != 0) {
2945       vm_features.set_feature(CPU_AVX512F);
2946       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2947         vm_features.set_feature(CPU_AVX512CD);
2948       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2949         vm_features.set_feature(CPU_AVX512DQ);
2950       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2951         vm_features.set_feature(CPU_AVX512_IFMA);
2952       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2953         vm_features.set_feature(CPU_AVX512PF);
2954       if (sef_cpuid7_ebx.bits.avx512er != 0)
2955         vm_features.set_feature(CPU_AVX512ER);
2956       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2957         vm_features.set_feature(CPU_AVX512BW);
2958       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2959         vm_features.set_feature(CPU_AVX512VL);
2960       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2961         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2962       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2963         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2964       if (sef_cpuid7_ecx.bits.vaes != 0)
2965         vm_features.set_feature(CPU_AVX512_VAES);
2966       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2967         vm_features.set_feature(CPU_AVX512_VNNI);
2968       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2969         vm_features.set_feature(CPU_AVX512_BITALG);
2970       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2971         vm_features.set_feature(CPU_AVX512_VBMI);
2972       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2973         vm_features.set_feature(CPU_AVX512_VBMI2);
2974     }
2975     if (is_intel()) {
2976       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2977           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2978           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2979           xem_xcr0_eax.bits.opmask != 0 &&
2980           xem_xcr0_eax.bits.zmm512 != 0 &&
2981           xem_xcr0_eax.bits.zmm32 != 0) {
2982         vm_features.set_feature(CPU_AVX10_1);
2983         vm_features.set_feature(CPU_AVX512F);
2984         vm_features.set_feature(CPU_AVX512CD);
2985         vm_features.set_feature(CPU_AVX512DQ);
2986         vm_features.set_feature(CPU_AVX512PF);
2987         vm_features.set_feature(CPU_AVX512ER);
2988         vm_features.set_feature(CPU_AVX512BW);
2989         vm_features.set_feature(CPU_AVX512VL);
2990         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2991         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2992         vm_features.set_feature(CPU_AVX512_VAES);
2993         vm_features.set_feature(CPU_AVX512_VNNI);
2994         vm_features.set_feature(CPU_AVX512_BITALG);
2995         vm_features.set_feature(CPU_AVX512_VBMI);
2996         vm_features.set_feature(CPU_AVX512_VBMI2);
2997         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
2998           vm_features.set_feature(CPU_AVX10_2);
2999         }
3000       }
3001     }
3002   }
3003 
3004   if (std_cpuid1_ecx.bits.hv != 0)
3005     vm_features.set_feature(CPU_HV);
3006   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3007     vm_features.set_feature(CPU_BMI1);
3008   if (std_cpuid1_edx.bits.tsc != 0)
3009     vm_features.set_feature(CPU_TSC);
3010   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3011     vm_features.set_feature(CPU_TSCINV_BIT);
3012   if (std_cpuid1_ecx.bits.aes != 0)
3013     vm_features.set_feature(CPU_AES);
3014   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3015     vm_features.set_feature(CPU_LZCNT);
3016   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3017     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3018   if (sef_cpuid7_ebx.bits.erms != 0)
3019     vm_features.set_feature(CPU_ERMS);
3020   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3021     vm_features.set_feature(CPU_FSRM);
3022   if (std_cpuid1_ecx.bits.clmul != 0)
3023     vm_features.set_feature(CPU_CLMUL);
3024   if (sef_cpuid7_ebx.bits.rtm != 0)
3025     vm_features.set_feature(CPU_RTM);
3026   if (sef_cpuid7_ebx.bits.adx != 0)
3027      vm_features.set_feature(CPU_ADX);
3028   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3029     vm_features.set_feature(CPU_BMI2);
3030   if (sef_cpuid7_ebx.bits.sha != 0)
3031     vm_features.set_feature(CPU_SHA);
3032   if (std_cpuid1_ecx.bits.fma != 0)
3033     vm_features.set_feature(CPU_FMA);
3034   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3035     vm_features.set_feature(CPU_FLUSHOPT);
3036   if (sef_cpuid7_ebx.bits.clwb != 0)
3037     vm_features.set_feature(CPU_CLWB);
3038   if (ext_cpuid1_edx.bits.rdtscp != 0)
3039     vm_features.set_feature(CPU_RDTSCP);
3040   if (sef_cpuid7_ecx.bits.rdpid != 0)
3041     vm_features.set_feature(CPU_RDPID);
3042 
3043   // AMD|Hygon additional features.
3044   if (is_amd_family()) {
3045     // PREFETCHW was checked above, check TDNOW here.
3046     if ((ext_cpuid1_edx.bits.tdnow != 0))
3047       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3048     if (ext_cpuid1_ecx.bits.sse4a != 0)
3049       vm_features.set_feature(CPU_SSE4A);
3050   }
3051 
3052   // Intel additional features.
3053   if (is_intel()) {
3054     if (sef_cpuid7_edx.bits.serialize != 0)
3055       vm_features.set_feature(CPU_SERIALIZE);
3056     if (sef_cpuid7_edx.bits.hybrid != 0)
3057       vm_features.set_feature(CPU_HYBRID);
3058     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3059       vm_features.set_feature(CPU_AVX512_FP16);
3060   }
3061 
3062   // ZX additional features.
3063   if (is_zx()) {
3064     // We do not know if these are supported by ZX, so we cannot trust
3065     // common CPUID bit for them.
3066     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3067     vm_features.clear_feature(CPU_CLWB);
3068   }
3069 
3070   // Protection key features.
3071   if (sef_cpuid7_ecx.bits.pku != 0) {
3072     vm_features.set_feature(CPU_PKU);
3073   }
3074   if (sef_cpuid7_ecx.bits.ospke != 0) {
3075     vm_features.set_feature(CPU_OSPKE);
3076   }
3077 
3078   // Control flow enforcement (CET) features.
3079   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3080     vm_features.set_feature(CPU_CET_SS);
3081   }
3082   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3083     vm_features.set_feature(CPU_CET_IBT);
3084   }
3085 
3086   // Composite features.
3087   if (supports_tscinv_bit() &&
3088       ((is_amd_family() && !is_amd_Barcelona()) ||
3089        is_intel_tsc_synched_at_init())) {
3090     vm_features.set_feature(CPU_TSCINV);
3091   }
3092   return vm_features;
3093 }
3094 
3095 bool VM_Version::os_supports_avx_vectors() {
3096   bool retVal = false;
3097   int nreg = 4;
3098   if (supports_evex()) {
3099     // Verify that OS save/restore all bits of EVEX registers
3100     // during signal processing.
3101     retVal = true;
3102     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3103       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3104         retVal = false;
3105         break;
3106       }
3107     }
3108   } else if (supports_avx()) {
3109     // Verify that OS save/restore all bits of AVX registers
3110     // during signal processing.
3111     retVal = true;
3112     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3113       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3114         retVal = false;
3115         break;
3116       }
3117     }
3118     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3119     if (retVal == false) {
3120       // Verify that OS save/restore all bits of EVEX registers
3121       // during signal processing.
3122       retVal = true;
3123       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3124         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3125           retVal = false;
3126           break;
3127         }
3128       }
3129     }
3130   }
3131   return retVal;
3132 }
3133 
3134 bool VM_Version::os_supports_apx_egprs() {
3135   if (!supports_apx_f()) {
3136     return false;
3137   }
3138   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3139       _cpuid_info.apx_save[1] != egpr_test_value()) {
3140     return false;
3141   }
3142   return true;
3143 }
3144 
3145 uint VM_Version::cores_per_cpu() {
3146   uint result = 1;
3147   if (is_intel()) {
3148     bool supports_topology = supports_processor_topology();
3149     if (supports_topology) {
3150       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3151                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3152     }
3153     if (!supports_topology || result == 0) {
3154       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3155     }
3156   } else if (is_amd_family()) {
3157     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3158     if (cpu_family() >= 0x17) { // Zen or later
3159       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3160     }
3161   } else if (is_zx()) {
3162     bool supports_topology = supports_processor_topology();
3163     if (supports_topology) {
3164       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3165                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3166     }
3167     if (!supports_topology || result == 0) {
3168       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3169     }
3170   }
3171   return result;
3172 }
3173 
3174 uint VM_Version::threads_per_core() {
3175   uint result = 1;
3176   if (is_intel() && supports_processor_topology()) {
3177     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3178   } else if (is_zx() && supports_processor_topology()) {
3179     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3180   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3181     if (cpu_family() >= 0x17) {
3182       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3183     } else {
3184       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3185                  cores_per_cpu();
3186     }
3187   }
3188   return (result == 0 ? 1 : result);
3189 }
3190 
3191 uint VM_Version::L1_line_size() {
3192   uint result = 0;
3193   if (is_intel()) {
3194     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3195   } else if (is_amd_family()) {
3196     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3197   } else if (is_zx()) {
3198     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3199   }
3200   if (result < 32) // not defined ?
3201     result = 32;   // 32 bytes by default on x86 and other x64
3202   return result;
3203 }
3204 
3205 bool VM_Version::is_intel_tsc_synched_at_init() {
3206   if (is_intel_family_core()) {
3207     uint32_t ext_model = extended_cpu_model();
3208     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3209         ext_model == CPU_MODEL_WESTMERE_EP    ||
3210         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3211         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3212       // <= 2-socket invariant tsc support. EX versions are usually used
3213       // in > 2-socket systems and likely don't synchronize tscs at
3214       // initialization.
3215       // Code that uses tsc values must be prepared for them to arbitrarily
3216       // jump forward or backward.
3217       return true;
3218     }
3219   }
3220   return false;
3221 }
3222 
3223 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3224   // Hardware prefetching (distance/size in bytes):
3225   // Pentium 3 -  64 /  32
3226   // Pentium 4 - 256 / 128
3227   // Athlon    -  64 /  32 ????
3228   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3229   // Core      - 128 /  64
3230   //
3231   // Software prefetching (distance in bytes / instruction with best score):
3232   // Pentium 3 - 128 / prefetchnta
3233   // Pentium 4 - 512 / prefetchnta
3234   // Athlon    - 128 / prefetchnta
3235   // Opteron   - 256 / prefetchnta
3236   // Core      - 256 / prefetchnta
3237   // It will be used only when AllocatePrefetchStyle > 0
3238 
3239   if (is_amd_family()) { // AMD | Hygon
3240     if (supports_sse2()) {
3241       return 256; // Opteron
3242     } else {
3243       return 128; // Athlon
3244     }
3245   } else { // Intel
3246     if (supports_sse3() && is_intel_server_family()) {
3247       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3248         return 192;
3249       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3250         return 384;
3251       }
3252     }
3253     if (supports_sse2()) {
3254       if (is_intel_server_family()) {
3255         return 256; // Pentium M, Core, Core2
3256       } else {
3257         return 512; // Pentium 4
3258       }
3259     } else {
3260       return 128; // Pentium 3 (and all other old CPUs)
3261     }
3262   }
3263 }
3264 
3265 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3266   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3267   switch (id) {
3268   case vmIntrinsics::_floatToFloat16:
3269   case vmIntrinsics::_float16ToFloat:
3270     if (!supports_float16()) {
3271       return false;
3272     }
3273     break;
3274   default:
3275     break;
3276   }
3277   return true;
3278 }
3279 
3280 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3281   int i = 0;
3282   ss.join([&]() {
3283     while (i < MAX_CPU_FEATURES) {
3284       if (_features.supports_feature((VM_Version::Feature_Flag)i)) {
3285         return _features_names[i++];
3286       }
3287       i += 1;
3288     }
3289     return (const char*)nullptr;
3290   }, ", ");
3291 }