1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2550;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76   typedef void (*getCPUIDBrandString_stub_t)(void*);
  77 }
  78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  79 static detect_virt_stub_t detect_virt_stub = nullptr;
  80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
  82 
  83 bool VM_Version::supports_clflush() {
  84   // clflush should always be available on x86_64
  85   // if not we are in real trouble because we rely on it
  86   // to flush the code cache.
  87   // Unfortunately, Assembler::clflush is currently called as part
  88   // of generation of the code cache flush routine. This happens
  89   // under Universe::init before the processor features are set
  90   // up. Assembler::flush calls this routine to check that clflush
  91   // is allowed. So, we give the caller a free pass if Universe init
  92   // is still in progress.
  93   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  94   return true;
  95 }
  96 
  97 #define CPUID_STANDARD_FN   0x0
  98 #define CPUID_STANDARD_FN_1 0x1
  99 #define CPUID_STANDARD_FN_4 0x4
 100 #define CPUID_STANDARD_FN_B 0xb
 101 
 102 #define CPUID_EXTENDED_FN   0x80000000
 103 #define CPUID_EXTENDED_FN_1 0x80000001
 104 #define CPUID_EXTENDED_FN_2 0x80000002
 105 #define CPUID_EXTENDED_FN_3 0x80000003
 106 #define CPUID_EXTENDED_FN_4 0x80000004
 107 #define CPUID_EXTENDED_FN_7 0x80000007
 108 #define CPUID_EXTENDED_FN_8 0x80000008
 109 
 110 class VM_Version_StubGenerator: public StubCodeGenerator {
 111  public:
 112 
 113   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 114 
 115   address clear_apx_test_state() {
 116 #   define __ _masm->
 117     address start = __ pc();
 118     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 119     // handling guarantees that preserved register values post signal handling were
 120     // re-instantiated by operating system and not because they were not modified externally.
 121 
 122     bool save_apx = UseAPX;
 123     VM_Version::set_apx_cpuFeatures();
 124     UseAPX = true;
 125     // EGPR state save/restoration.
 126     __ mov64(r16, 0L);
 127     __ mov64(r31, 0L);
 128     UseAPX = save_apx;
 129     VM_Version::clean_cpuFeatures();
 130     __ ret(0);
 131     return start;
 132   }
 133 
 134   address generate_get_cpu_info() {
 135     // Flags to test CPU type.
 136     const uint32_t HS_EFL_AC = 0x40000;
 137     const uint32_t HS_EFL_ID = 0x200000;
 138     // Values for when we don't have a CPUID instruction.
 139     const int      CPU_FAMILY_SHIFT = 8;
 140     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 141     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 142     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 143 
 144     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 145     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 146     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 147     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 148 
 149     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 150 #   define __ _masm->
 151 
 152     address start = __ pc();
 153 
 154     //
 155     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 156     //
 157     // rcx and rdx are first and second argument registers on windows
 158 
 159     __ push(rbp);
 160     __ mov(rbp, c_rarg0); // cpuid_info address
 161     __ push(rbx);
 162     __ push(rsi);
 163     __ pushf();          // preserve rbx, and flags
 164     __ pop(rax);
 165     __ push(rax);
 166     __ mov(rcx, rax);
 167     //
 168     // if we are unable to change the AC flag, we have a 386
 169     //
 170     __ xorl(rax, HS_EFL_AC);
 171     __ push(rax);
 172     __ popf();
 173     __ pushf();
 174     __ pop(rax);
 175     __ cmpptr(rax, rcx);
 176     __ jccb(Assembler::notEqual, detect_486);
 177 
 178     __ movl(rax, CPU_FAMILY_386);
 179     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 180     __ jmp(done);
 181 
 182     //
 183     // If we are unable to change the ID flag, we have a 486 which does
 184     // not support the "cpuid" instruction.
 185     //
 186     __ bind(detect_486);
 187     __ mov(rax, rcx);
 188     __ xorl(rax, HS_EFL_ID);
 189     __ push(rax);
 190     __ popf();
 191     __ pushf();
 192     __ pop(rax);
 193     __ cmpptr(rcx, rax);
 194     __ jccb(Assembler::notEqual, detect_586);
 195 
 196     __ bind(cpu486);
 197     __ movl(rax, CPU_FAMILY_486);
 198     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 199     __ jmp(done);
 200 
 201     //
 202     // At this point, we have a chip which supports the "cpuid" instruction
 203     //
 204     __ bind(detect_586);
 205     __ xorl(rax, rax);
 206     __ cpuid();
 207     __ orl(rax, rax);
 208     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 209                                         // value of at least 1, we give up and
 210                                         // assume a 486
 211     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 212     __ movl(Address(rsi, 0), rax);
 213     __ movl(Address(rsi, 4), rbx);
 214     __ movl(Address(rsi, 8), rcx);
 215     __ movl(Address(rsi,12), rdx);
 216 
 217     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 218     __ jccb(Assembler::belowEqual, std_cpuid4);
 219 
 220     //
 221     // cpuid(0xB) Processor Topology
 222     //
 223     __ movl(rax, 0xb);
 224     __ xorl(rcx, rcx);   // Threads level
 225     __ cpuid();
 226 
 227     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 228     __ movl(Address(rsi, 0), rax);
 229     __ movl(Address(rsi, 4), rbx);
 230     __ movl(Address(rsi, 8), rcx);
 231     __ movl(Address(rsi,12), rdx);
 232 
 233     __ movl(rax, 0xb);
 234     __ movl(rcx, 1);     // Cores level
 235     __ cpuid();
 236     __ push(rax);
 237     __ andl(rax, 0x1f);  // Determine if valid topology level
 238     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 239     __ andl(rax, 0xffff);
 240     __ pop(rax);
 241     __ jccb(Assembler::equal, std_cpuid4);
 242 
 243     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 244     __ movl(Address(rsi, 0), rax);
 245     __ movl(Address(rsi, 4), rbx);
 246     __ movl(Address(rsi, 8), rcx);
 247     __ movl(Address(rsi,12), rdx);
 248 
 249     __ movl(rax, 0xb);
 250     __ movl(rcx, 2);     // Packages level
 251     __ cpuid();
 252     __ push(rax);
 253     __ andl(rax, 0x1f);  // Determine if valid topology level
 254     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 255     __ andl(rax, 0xffff);
 256     __ pop(rax);
 257     __ jccb(Assembler::equal, std_cpuid4);
 258 
 259     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 260     __ movl(Address(rsi, 0), rax);
 261     __ movl(Address(rsi, 4), rbx);
 262     __ movl(Address(rsi, 8), rcx);
 263     __ movl(Address(rsi,12), rdx);
 264 
 265     //
 266     // cpuid(0x4) Deterministic cache params
 267     //
 268     __ bind(std_cpuid4);
 269     __ movl(rax, 4);
 270     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 271     __ jccb(Assembler::greater, std_cpuid1);
 272 
 273     __ xorl(rcx, rcx);   // L1 cache
 274     __ cpuid();
 275     __ push(rax);
 276     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 277     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 278     __ pop(rax);
 279     __ jccb(Assembler::equal, std_cpuid1);
 280 
 281     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 282     __ movl(Address(rsi, 0), rax);
 283     __ movl(Address(rsi, 4), rbx);
 284     __ movl(Address(rsi, 8), rcx);
 285     __ movl(Address(rsi,12), rdx);
 286 
 287     //
 288     // Standard cpuid(0x1)
 289     //
 290     __ bind(std_cpuid1);
 291     __ movl(rax, 1);
 292     __ cpuid();
 293     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 294     __ movl(Address(rsi, 0), rax);
 295     __ movl(Address(rsi, 4), rbx);
 296     __ movl(Address(rsi, 8), rcx);
 297     __ movl(Address(rsi,12), rdx);
 298 
 299     //
 300     // Check if OS has enabled XGETBV instruction to access XCR0
 301     // (OSXSAVE feature flag) and CPU supports AVX
 302     //
 303     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 304     __ cmpl(rcx, 0x18000000);
 305     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 306 
 307     //
 308     // XCR0, XFEATURE_ENABLED_MASK register
 309     //
 310     __ xorl(rcx, rcx);   // zero for XCR0 register
 311     __ xgetbv();
 312     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 313     __ movl(Address(rsi, 0), rax);
 314     __ movl(Address(rsi, 4), rdx);
 315 
 316     //
 317     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 318     //
 319     __ bind(sef_cpuid);
 320     __ movl(rax, 7);
 321     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 322     __ jccb(Assembler::greater, ext_cpuid);
 323     // ECX = 0
 324     __ xorl(rcx, rcx);
 325     __ cpuid();
 326     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 327     __ movl(Address(rsi, 0), rax);
 328     __ movl(Address(rsi, 4), rbx);
 329     __ movl(Address(rsi, 8), rcx);
 330     __ movl(Address(rsi, 12), rdx);
 331 
 332     //
 333     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 334     //
 335     __ bind(sefsl1_cpuid);
 336     __ movl(rax, 7);
 337     __ movl(rcx, 1);
 338     __ cpuid();
 339     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 340     __ movl(Address(rsi, 0), rax);
 341     __ movl(Address(rsi, 4), rdx);
 342 
 343     //
 344     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 345     //
 346     __ bind(std_cpuid29);
 347     __ movl(rax, 0x29);
 348     __ movl(rcx, 0);
 349     __ cpuid();
 350     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 351     __ movl(Address(rsi, 0), rbx);
 352 
 353     //
 354     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 355     //
 356     __ bind(std_cpuid24);
 357     __ movl(rax, 0x24);
 358     __ movl(rcx, 0);
 359     __ cpuid();
 360     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 361     __ movl(Address(rsi, 0), rax);
 362     __ movl(Address(rsi, 4), rbx);
 363 
 364     //
 365     // Extended cpuid(0x80000000)
 366     //
 367     __ bind(ext_cpuid);
 368     __ movl(rax, 0x80000000);
 369     __ cpuid();
 370     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 371     __ jcc(Assembler::belowEqual, done);
 372     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 373     __ jcc(Assembler::belowEqual, ext_cpuid1);
 374     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 375     __ jccb(Assembler::belowEqual, ext_cpuid5);
 376     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 377     __ jccb(Assembler::belowEqual, ext_cpuid7);
 378     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 379     __ jccb(Assembler::belowEqual, ext_cpuid8);
 380     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 381     __ jccb(Assembler::below, ext_cpuid8);
 382     //
 383     // Extended cpuid(0x8000001E)
 384     //
 385     __ movl(rax, 0x8000001E);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000008)
 395     //
 396     __ bind(ext_cpuid8);
 397     __ movl(rax, 0x80000008);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000007)
 407     //
 408     __ bind(ext_cpuid7);
 409     __ movl(rax, 0x80000007);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000005)
 419     //
 420     __ bind(ext_cpuid5);
 421     __ movl(rax, 0x80000005);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Extended cpuid(0x80000001)
 431     //
 432     __ bind(ext_cpuid1);
 433     __ movl(rax, 0x80000001);
 434     __ cpuid();
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 436     __ movl(Address(rsi, 0), rax);
 437     __ movl(Address(rsi, 4), rbx);
 438     __ movl(Address(rsi, 8), rcx);
 439     __ movl(Address(rsi,12), rdx);
 440 
 441     //
 442     // Check if OS has enabled XGETBV instruction to access XCR0
 443     // (OSXSAVE feature flag) and CPU supports APX
 444     //
 445     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 446     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 447     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 448     __ movl(rax, 0x200000);
 449     __ andl(rax, Address(rsi, 4));
 450     __ jcc(Assembler::equal, vector_save_restore);
 451     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 452     __ movl(rax, 0x80000);
 453     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 454     __ jcc(Assembler::equal, vector_save_restore);
 455 
 456     bool save_apx = UseAPX;
 457     VM_Version::set_apx_cpuFeatures();
 458     UseAPX = true;
 459     __ mov64(r16, VM_Version::egpr_test_value());
 460     __ mov64(r31, VM_Version::egpr_test_value());
 461     __ xorl(rsi, rsi);
 462     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 463     // Generate SEGV
 464     __ movl(rax, Address(rsi, 0));
 465 
 466     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 467     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 468     __ movq(Address(rsi, 0), r16);
 469     __ movq(Address(rsi, 8), r31);
 470 
 471     UseAPX = save_apx;
 472     __ bind(vector_save_restore);
 473     //
 474     // Check if OS has enabled XGETBV instruction to access XCR0
 475     // (OSXSAVE feature flag) and CPU supports AVX
 476     //
 477     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 478     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 479     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 480     __ cmpl(rcx, 0x18000000);
 481     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 482 
 483     __ movl(rax, 0x6);
 484     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 485     __ cmpl(rax, 0x6);
 486     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 487 
 488     // we need to bridge farther than imm8, so we use this island as a thunk
 489     __ bind(done);
 490     __ jmp(wrapup);
 491 
 492     __ bind(start_simd_check);
 493     //
 494     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 495     // registers are not restored after a signal processing.
 496     // Generate SEGV here (reference through null)
 497     // and check upper YMM/ZMM bits after it.
 498     //
 499     int saved_useavx = UseAVX;
 500     int saved_usesse = UseSSE;
 501 
 502     // If UseAVX is uninitialized or is set by the user to include EVEX
 503     if (use_evex) {
 504       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 505       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 506       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 507       __ movl(rax, 0x10000);
 508       __ andl(rax, Address(rsi, 4));
 509       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 510       __ movl(rbx, 0x80000);
 511       __ andl(rbx, Address(rsi, 4));
 512       __ orl(rax, rbx);
 513       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 514       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 515       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 516       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 517       __ movl(rax, 0xE0);
 518       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 519       __ cmpl(rax, 0xE0);
 520       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 521 
 522       if (FLAG_IS_DEFAULT(UseAVX)) {
 523         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 524         __ movl(rax, Address(rsi, 0));
 525         __ cmpl(rax, 0x50654);              // If it is Skylake
 526         __ jcc(Assembler::equal, legacy_setup);
 527       }
 528       // EVEX setup: run in lowest evex mode
 529       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 530       UseAVX = 3;
 531       UseSSE = 2;
 532 #ifdef _WINDOWS
 533       // xmm5-xmm15 are not preserved by caller on windows
 534       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 535       __ subptr(rsp, 64);
 536       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 537       __ subptr(rsp, 64);
 538       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 539       __ subptr(rsp, 64);
 540       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 541 #endif // _WINDOWS
 542 
 543       // load value into all 64 bytes of zmm7 register
 544       __ movl(rcx, VM_Version::ymm_test_value());
 545       __ movdl(xmm0, rcx);
 546       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 547       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 548       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 549       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 550       VM_Version::clean_cpuFeatures();
 551       __ jmp(save_restore_except);
 552     }
 553 
 554     __ bind(legacy_setup);
 555     // AVX setup
 556     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 557     UseAVX = 1;
 558     UseSSE = 2;
 559 #ifdef _WINDOWS
 560     __ subptr(rsp, 32);
 561     __ vmovdqu(Address(rsp, 0), xmm7);
 562     __ subptr(rsp, 32);
 563     __ vmovdqu(Address(rsp, 0), xmm8);
 564     __ subptr(rsp, 32);
 565     __ vmovdqu(Address(rsp, 0), xmm15);
 566 #endif // _WINDOWS
 567 
 568     // load value into all 32 bytes of ymm7 register
 569     __ movl(rcx, VM_Version::ymm_test_value());
 570 
 571     __ movdl(xmm0, rcx);
 572     __ pshufd(xmm0, xmm0, 0x00);
 573     __ vinsertf128_high(xmm0, xmm0);
 574     __ vmovdqu(xmm7, xmm0);
 575     __ vmovdqu(xmm8, xmm0);
 576     __ vmovdqu(xmm15, xmm0);
 577     VM_Version::clean_cpuFeatures();
 578 
 579     __ bind(save_restore_except);
 580     __ xorl(rsi, rsi);
 581     VM_Version::set_cpuinfo_segv_addr(__ pc());
 582     // Generate SEGV
 583     __ movl(rax, Address(rsi, 0));
 584 
 585     VM_Version::set_cpuinfo_cont_addr(__ pc());
 586     // Returns here after signal. Save xmm0 to check it later.
 587 
 588     // If UseAVX is uninitialized or is set by the user to include EVEX
 589     if (use_evex) {
 590       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 591       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 592       __ movl(rax, 0x10000);
 593       __ andl(rax, Address(rsi, 4));
 594       __ jcc(Assembler::equal, legacy_save_restore);
 595       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 596       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 597       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 598       __ movl(rax, 0xE0);
 599       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 600       __ cmpl(rax, 0xE0);
 601       __ jcc(Assembler::notEqual, legacy_save_restore);
 602 
 603       if (FLAG_IS_DEFAULT(UseAVX)) {
 604         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 605         __ movl(rax, Address(rsi, 0));
 606         __ cmpl(rax, 0x50654);              // If it is Skylake
 607         __ jcc(Assembler::equal, legacy_save_restore);
 608       }
 609       // EVEX check: run in lowest evex mode
 610       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 611       UseAVX = 3;
 612       UseSSE = 2;
 613       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 614       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 615       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 616       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 617       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 618 
 619 #ifdef _WINDOWS
 620       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 621       __ addptr(rsp, 64);
 622       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 623       __ addptr(rsp, 64);
 624       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 625       __ addptr(rsp, 64);
 626 #endif // _WINDOWS
 627       generate_vzeroupper(wrapup);
 628       VM_Version::clean_cpuFeatures();
 629       UseAVX = saved_useavx;
 630       UseSSE = saved_usesse;
 631       __ jmp(wrapup);
 632    }
 633 
 634     __ bind(legacy_save_restore);
 635     // AVX check
 636     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 637     UseAVX = 1;
 638     UseSSE = 2;
 639     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 640     __ vmovdqu(Address(rsi, 0), xmm0);
 641     __ vmovdqu(Address(rsi, 32), xmm7);
 642     __ vmovdqu(Address(rsi, 64), xmm8);
 643     __ vmovdqu(Address(rsi, 96), xmm15);
 644 
 645 #ifdef _WINDOWS
 646     __ vmovdqu(xmm15, Address(rsp, 0));
 647     __ addptr(rsp, 32);
 648     __ vmovdqu(xmm8, Address(rsp, 0));
 649     __ addptr(rsp, 32);
 650     __ vmovdqu(xmm7, Address(rsp, 0));
 651     __ addptr(rsp, 32);
 652 #endif // _WINDOWS
 653 
 654     generate_vzeroupper(wrapup);
 655     VM_Version::clean_cpuFeatures();
 656     UseAVX = saved_useavx;
 657     UseSSE = saved_usesse;
 658 
 659     __ bind(wrapup);
 660     __ popf();
 661     __ pop(rsi);
 662     __ pop(rbx);
 663     __ pop(rbp);
 664     __ ret(0);
 665 
 666 #   undef __
 667 
 668     return start;
 669   };
 670   void generate_vzeroupper(Label& L_wrapup) {
 671 #   define __ _masm->
 672     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 673     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 674     __ jcc(Assembler::notEqual, L_wrapup);
 675     __ movl(rcx, 0x0FFF0FF0);
 676     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 677     __ andl(rcx, Address(rsi, 0));
 678     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 679     __ jcc(Assembler::equal, L_wrapup);
 680     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 681     __ jcc(Assembler::equal, L_wrapup);
 682     // vzeroupper() will use a pre-computed instruction sequence that we
 683     // can't compute until after we've determined CPU capabilities. Use
 684     // uncached variant here directly to be able to bootstrap correctly
 685     __ vzeroupper_uncached();
 686 #   undef __
 687   }
 688   address generate_detect_virt() {
 689     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 690 #   define __ _masm->
 691 
 692     address start = __ pc();
 693 
 694     // Evacuate callee-saved registers
 695     __ push(rbp);
 696     __ push(rbx);
 697     __ push(rsi); // for Windows
 698 
 699     __ mov(rax, c_rarg0); // CPUID leaf
 700     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 701 
 702     __ cpuid();
 703 
 704     // Store result to register array
 705     __ movl(Address(rsi,  0), rax);
 706     __ movl(Address(rsi,  4), rbx);
 707     __ movl(Address(rsi,  8), rcx);
 708     __ movl(Address(rsi, 12), rdx);
 709 
 710     // Epilogue
 711     __ pop(rsi);
 712     __ pop(rbx);
 713     __ pop(rbp);
 714     __ ret(0);
 715 
 716 #   undef __
 717 
 718     return start;
 719   };
 720 
 721 
 722   address generate_getCPUIDBrandString(void) {
 723     // Flags to test CPU type.
 724     const uint32_t HS_EFL_AC           = 0x40000;
 725     const uint32_t HS_EFL_ID           = 0x200000;
 726     // Values for when we don't have a CPUID instruction.
 727     const int      CPU_FAMILY_SHIFT = 8;
 728     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 729     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 730 
 731     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 732 
 733     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 734 #   define __ _masm->
 735 
 736     address start = __ pc();
 737 
 738     //
 739     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 740     //
 741     // rcx and rdx are first and second argument registers on windows
 742 
 743     __ push(rbp);
 744     __ mov(rbp, c_rarg0); // cpuid_info address
 745     __ push(rbx);
 746     __ push(rsi);
 747     __ pushf();          // preserve rbx, and flags
 748     __ pop(rax);
 749     __ push(rax);
 750     __ mov(rcx, rax);
 751     //
 752     // if we are unable to change the AC flag, we have a 386
 753     //
 754     __ xorl(rax, HS_EFL_AC);
 755     __ push(rax);
 756     __ popf();
 757     __ pushf();
 758     __ pop(rax);
 759     __ cmpptr(rax, rcx);
 760     __ jccb(Assembler::notEqual, detect_486);
 761 
 762     __ movl(rax, CPU_FAMILY_386);
 763     __ jmp(done);
 764 
 765     //
 766     // If we are unable to change the ID flag, we have a 486 which does
 767     // not support the "cpuid" instruction.
 768     //
 769     __ bind(detect_486);
 770     __ mov(rax, rcx);
 771     __ xorl(rax, HS_EFL_ID);
 772     __ push(rax);
 773     __ popf();
 774     __ pushf();
 775     __ pop(rax);
 776     __ cmpptr(rcx, rax);
 777     __ jccb(Assembler::notEqual, detect_586);
 778 
 779     __ bind(cpu486);
 780     __ movl(rax, CPU_FAMILY_486);
 781     __ jmp(done);
 782 
 783     //
 784     // At this point, we have a chip which supports the "cpuid" instruction
 785     //
 786     __ bind(detect_586);
 787     __ xorl(rax, rax);
 788     __ cpuid();
 789     __ orl(rax, rax);
 790     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 791                                         // value of at least 1, we give up and
 792                                         // assume a 486
 793 
 794     //
 795     // Extended cpuid(0x80000000) for processor brand string detection
 796     //
 797     __ bind(ext_cpuid);
 798     __ movl(rax, CPUID_EXTENDED_FN);
 799     __ cpuid();
 800     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 801     __ jcc(Assembler::below, done);
 802 
 803     //
 804     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 805     //
 806     __ movl(rax, CPUID_EXTENDED_FN_2);
 807     __ cpuid();
 808     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 809     __ movl(Address(rsi, 0), rax);
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 811     __ movl(Address(rsi, 0), rbx);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 813     __ movl(Address(rsi, 0), rcx);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 815     __ movl(Address(rsi,0), rdx);
 816 
 817     //
 818     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 819     //
 820     __ movl(rax, CPUID_EXTENDED_FN_3);
 821     __ cpuid();
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 823     __ movl(Address(rsi, 0), rax);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 825     __ movl(Address(rsi, 0), rbx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 827     __ movl(Address(rsi, 0), rcx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 829     __ movl(Address(rsi,0), rdx);
 830 
 831     //
 832     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 833     //
 834     __ movl(rax, CPUID_EXTENDED_FN_4);
 835     __ cpuid();
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 837     __ movl(Address(rsi, 0), rax);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 839     __ movl(Address(rsi, 0), rbx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 841     __ movl(Address(rsi, 0), rcx);
 842     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 843     __ movl(Address(rsi,0), rdx);
 844 
 845     //
 846     // return
 847     //
 848     __ bind(done);
 849     __ popf();
 850     __ pop(rsi);
 851     __ pop(rbx);
 852     __ pop(rbp);
 853     __ ret(0);
 854 
 855 #   undef __
 856 
 857     return start;
 858   };
 859 };
 860 
 861 void VM_Version::get_processor_features() {
 862 
 863   _cpu = 4; // 486 by default
 864   _model = 0;
 865   _stepping = 0;
 866   _logical_processors_per_package = 1;
 867   // i486 internal cache is both I&D and has a 16-byte line size
 868   _L1_data_cache_line_size = 16;
 869 
 870   // Get raw processor info
 871 
 872   get_cpu_info_stub(&_cpuid_info);
 873 
 874   assert_is_initialized();
 875   _cpu = extended_cpu_family();
 876   _model = extended_cpu_model();
 877   _stepping = cpu_stepping();
 878 
 879   if (cpu_family() > 4) { // it supports CPUID
 880     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 881     _cpu_features = _features; // Preserve features
 882     // Logical processors are only available on P4s and above,
 883     // and only if hyperthreading is available.
 884     _logical_processors_per_package = logical_processor_count();
 885     _L1_data_cache_line_size = L1_line_size();
 886   }
 887 
 888   // xchg and xadd instructions
 889   _supports_atomic_getset4 = true;
 890   _supports_atomic_getadd4 = true;
 891   _supports_atomic_getset8 = true;
 892   _supports_atomic_getadd8 = true;
 893 
 894   // OS should support SSE for x64 and hardware should support at least SSE2.
 895   if (!VM_Version::supports_sse2()) {
 896     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 897   }
 898   // in 64 bit the use of SSE2 is the minimum
 899   if (UseSSE < 2) UseSSE = 2;
 900 
 901   // flush_icache_stub have to be generated first.
 902   // That is why Icache line size is hard coded in ICache class,
 903   // see icache_x86.hpp. It is also the reason why we can't use
 904   // clflush instruction in 32-bit VM since it could be running
 905   // on CPU which does not support it.
 906   //
 907   // The only thing we can do is to verify that flushed
 908   // ICache::line_size has correct value.
 909   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 910   // clflush_size is size in quadwords (8 bytes).
 911   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 912 
 913   // assigning this field effectively enables Unsafe.writebackMemory()
 914   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 915   // that is only implemented on x86_64 and only if the OS plays ball
 916   if (os::supports_map_sync()) {
 917     // publish data cache line flush size to generic field, otherwise
 918     // let if default to zero thereby disabling writeback
 919     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 920   }
 921 
 922   // Check if processor has Intel Ecore
 923   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 924     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 925       _model == 0xCC || _model == 0xDD)) {
 926     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 927   }
 928 
 929   if (UseSSE < 4) {
 930     _features.clear_feature(CPU_SSE4_1);
 931     _features.clear_feature(CPU_SSE4_2);
 932   }
 933 
 934   if (UseSSE < 3) {
 935     _features.clear_feature(CPU_SSE3);
 936     _features.clear_feature(CPU_SSSE3);
 937     _features.clear_feature(CPU_SSE4A);
 938   }
 939 
 940   if (UseSSE < 2)
 941     _features.clear_feature(CPU_SSE2);
 942 
 943   if (UseSSE < 1)
 944     _features.clear_feature(CPU_SSE);
 945 
 946   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 947   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 948     UseAVX = 0;
 949   }
 950 
 951   // UseSSE is set to the smaller of what hardware supports and what
 952   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 953   // older Pentiums which do not support it.
 954   int use_sse_limit = 0;
 955   if (UseSSE > 0) {
 956     if (UseSSE > 3 && supports_sse4_1()) {
 957       use_sse_limit = 4;
 958     } else if (UseSSE > 2 && supports_sse3()) {
 959       use_sse_limit = 3;
 960     } else if (UseSSE > 1 && supports_sse2()) {
 961       use_sse_limit = 2;
 962     } else if (UseSSE > 0 && supports_sse()) {
 963       use_sse_limit = 1;
 964     } else {
 965       use_sse_limit = 0;
 966     }
 967   }
 968   if (FLAG_IS_DEFAULT(UseSSE)) {
 969     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 970   } else if (UseSSE > use_sse_limit) {
 971     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 972     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 973   }
 974 
 975   // first try initial setting and detect what we can support
 976   int use_avx_limit = 0;
 977   if (UseAVX > 0) {
 978     if (UseSSE < 4) {
 979       // Don't use AVX if SSE is unavailable or has been disabled.
 980       use_avx_limit = 0;
 981     } else if (UseAVX > 2 && supports_evex()) {
 982       use_avx_limit = 3;
 983     } else if (UseAVX > 1 && supports_avx2()) {
 984       use_avx_limit = 2;
 985     } else if (UseAVX > 0 && supports_avx()) {
 986       use_avx_limit = 1;
 987     } else {
 988       use_avx_limit = 0;
 989     }
 990   }
 991   if (FLAG_IS_DEFAULT(UseAVX)) {
 992     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 993     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 994       FLAG_SET_DEFAULT(UseAVX, 2);
 995     } else {
 996       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 997     }
 998   }
 999 
1000   if (UseAVX > use_avx_limit) {
1001     if (UseSSE < 4) {
1002       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1003     } else {
1004       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1005     }
1006     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1007   }
1008 
1009   if (UseAVX < 3) {
1010     _features.clear_feature(CPU_AVX512F);
1011     _features.clear_feature(CPU_AVX512DQ);
1012     _features.clear_feature(CPU_AVX512CD);
1013     _features.clear_feature(CPU_AVX512BW);
1014     _features.clear_feature(CPU_AVX512ER);
1015     _features.clear_feature(CPU_AVX512PF);
1016     _features.clear_feature(CPU_AVX512VL);
1017     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1018     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1019     _features.clear_feature(CPU_AVX512_VAES);
1020     _features.clear_feature(CPU_AVX512_VNNI);
1021     _features.clear_feature(CPU_AVX512_VBMI);
1022     _features.clear_feature(CPU_AVX512_VBMI2);
1023     _features.clear_feature(CPU_AVX512_BITALG);
1024     _features.clear_feature(CPU_AVX512_IFMA);
1025     _features.clear_feature(CPU_APX_F);
1026     _features.clear_feature(CPU_AVX512_FP16);
1027     _features.clear_feature(CPU_AVX10_1);
1028     _features.clear_feature(CPU_AVX10_2);
1029   }
1030 
1031 
1032   if (UseAVX < 2) {
1033     _features.clear_feature(CPU_AVX2);
1034     _features.clear_feature(CPU_AVX_IFMA);
1035   }
1036 
1037   if (UseAVX < 1) {
1038     _features.clear_feature(CPU_AVX);
1039     _features.clear_feature(CPU_VZEROUPPER);
1040     _features.clear_feature(CPU_F16C);
1041     _features.clear_feature(CPU_SHA512);
1042   }
1043 
1044   if (logical_processors_per_package() == 1) {
1045     // HT processor could be installed on a system which doesn't support HT.
1046     _features.clear_feature(CPU_HT);
1047   }
1048 
1049   if (is_intel()) { // Intel cpus specific settings
1050     if (is_knights_family()) {
1051       _features.clear_feature(CPU_VZEROUPPER);
1052       _features.clear_feature(CPU_AVX512BW);
1053       _features.clear_feature(CPU_AVX512VL);
1054       _features.clear_feature(CPU_APX_F);
1055       _features.clear_feature(CPU_AVX512DQ);
1056       _features.clear_feature(CPU_AVX512_VNNI);
1057       _features.clear_feature(CPU_AVX512_VAES);
1058       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1059       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1060       _features.clear_feature(CPU_AVX512_VBMI);
1061       _features.clear_feature(CPU_AVX512_VBMI2);
1062       _features.clear_feature(CPU_CLWB);
1063       _features.clear_feature(CPU_FLUSHOPT);
1064       _features.clear_feature(CPU_GFNI);
1065       _features.clear_feature(CPU_AVX512_BITALG);
1066       _features.clear_feature(CPU_AVX512_IFMA);
1067       _features.clear_feature(CPU_AVX_IFMA);
1068       _features.clear_feature(CPU_AVX512_FP16);
1069       _features.clear_feature(CPU_AVX10_1);
1070       _features.clear_feature(CPU_AVX10_2);
1071     }
1072   }
1073 
1074     // Currently APX support is only enabled for targets supporting AVX512VL feature.
1075   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1076   if (UseAPX && !apx_supported) {
1077     warning("UseAPX is not supported on this CPU, setting it to false");
1078     FLAG_SET_DEFAULT(UseAPX, false);
1079   }
1080 
1081   if (!UseAPX) {
1082     _features.clear_feature(CPU_APX_F);
1083   }
1084 
1085   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1086     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1087     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1088   } else {
1089     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1090   }
1091 
1092   assert(supports_clflush(), "Always present");
1093   if (X86ICacheSync == -1) {
1094     // Auto-detect, choosing the best performant one that still flushes
1095     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1096     if (supports_clwb()) {
1097       FLAG_SET_ERGO(X86ICacheSync, 3);
1098     } else if (supports_clflushopt()) {
1099       FLAG_SET_ERGO(X86ICacheSync, 2);
1100     } else {
1101       FLAG_SET_ERGO(X86ICacheSync, 1);
1102     }
1103   } else {
1104     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1105       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1106     }
1107     if ((X86ICacheSync == 3) && !supports_clwb()) {
1108       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1109     }
1110     if ((X86ICacheSync == 5) && !supports_serialize()) {
1111       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1112     }
1113   }
1114 
1115   stringStream ss(2048);
1116   if (supports_hybrid()) {
1117     ss.print("(hybrid)");
1118   } else {
1119     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1120   }
1121   ss.print(" family %d model %d stepping %d microcode 0x%x",
1122            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1123   ss.print(", ");
1124   int features_offset = (int)ss.size();
1125   insert_features_names(_features, ss);
1126 
1127   _cpu_info_string = ss.as_string(true);
1128   _features_string = _cpu_info_string + features_offset;
1129 
1130   // Use AES instructions if available.
1131   if (supports_aes()) {
1132     if (FLAG_IS_DEFAULT(UseAES)) {
1133       FLAG_SET_DEFAULT(UseAES, true);
1134     }
1135     if (!UseAES) {
1136       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1137         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1138       }
1139       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1140     } else {
1141       if (UseSSE > 2) {
1142         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1143           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1144         }
1145       } else {
1146         // The AES intrinsic stubs require AES instruction support (of course)
1147         // but also require sse3 mode or higher for instructions it use.
1148         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1149           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1150         }
1151         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1152       }
1153 
1154       // --AES-CTR begins--
1155       if (!UseAESIntrinsics) {
1156         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1157           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1158           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1159         }
1160       } else {
1161         if (supports_sse4_1()) {
1162           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1163             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1164           }
1165         } else {
1166            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1167            // but also require sse4.1 mode or higher for instructions it use.
1168           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1169              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1170            }
1171            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1172         }
1173       }
1174       // --AES-CTR ends--
1175     }
1176   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1177     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1178       warning("AES instructions are not available on this CPU");
1179       FLAG_SET_DEFAULT(UseAES, false);
1180     }
1181     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1182       warning("AES intrinsics are not available on this CPU");
1183       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1184     }
1185     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1186       warning("AES-CTR intrinsics are not available on this CPU");
1187       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1188     }
1189   }
1190 
1191   // Use CLMUL instructions if available.
1192   if (supports_clmul()) {
1193     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1194       UseCLMUL = true;
1195     }
1196   } else if (UseCLMUL) {
1197     if (!FLAG_IS_DEFAULT(UseCLMUL))
1198       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1199     FLAG_SET_DEFAULT(UseCLMUL, false);
1200   }
1201 
1202   if (UseCLMUL && (UseSSE > 2)) {
1203     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1204       UseCRC32Intrinsics = true;
1205     }
1206   } else if (UseCRC32Intrinsics) {
1207     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1208       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1209     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1210   }
1211 
1212   if (supports_avx2()) {
1213     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1214       UseAdler32Intrinsics = true;
1215     }
1216   } else if (UseAdler32Intrinsics) {
1217     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1218       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1219     }
1220     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1221   }
1222 
1223   if (supports_sse4_2() && supports_clmul()) {
1224     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1225       UseCRC32CIntrinsics = true;
1226     }
1227   } else if (UseCRC32CIntrinsics) {
1228     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1229       warning("CRC32C intrinsics are not available on this CPU");
1230     }
1231     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1232   }
1233 
1234   // GHASH/GCM intrinsics
1235   if (UseCLMUL && (UseSSE > 2)) {
1236     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1237       UseGHASHIntrinsics = true;
1238     }
1239   } else if (UseGHASHIntrinsics) {
1240     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1241       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1242     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1243   }
1244 
1245   // ChaCha20 Intrinsics
1246   // As long as the system supports AVX as a baseline we can do a
1247   // SIMD-enabled block function.  StubGenerator makes the determination
1248   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1249   // version.
1250   if (UseAVX >= 1) {
1251       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1252           UseChaCha20Intrinsics = true;
1253       }
1254   } else if (UseChaCha20Intrinsics) {
1255       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1256           warning("ChaCha20 intrinsic requires AVX instructions");
1257       }
1258       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1259   }
1260 
1261   // Kyber Intrinsics
1262   // Currently we only have them for AVX512
1263 #ifdef _LP64
1264   if (supports_evex() && supports_avx512bw()) {
1265       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1266           UseKyberIntrinsics = true;
1267       }
1268   } else
1269 #endif
1270   if (UseKyberIntrinsics) {
1271      warning("Intrinsics for ML-KEM are not available on this CPU.");
1272      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1273   }
1274 
1275   // Dilithium Intrinsics
1276   // Currently we only have them for AVX512
1277   if (supports_evex() && supports_avx512bw()) {
1278       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1279           UseDilithiumIntrinsics = true;
1280       }
1281   } else if (UseDilithiumIntrinsics) {
1282       warning("Intrinsics for ML-DSA are not available on this CPU.");
1283       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1284   }
1285 
1286   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1287   if (UseAVX >= 2) {
1288     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1289       UseBASE64Intrinsics = true;
1290     }
1291   } else if (UseBASE64Intrinsics) {
1292      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1293       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1294     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1295   }
1296 
1297   if (supports_fma()) {
1298     if (FLAG_IS_DEFAULT(UseFMA)) {
1299       UseFMA = true;
1300     }
1301   } else if (UseFMA) {
1302     warning("FMA instructions are not available on this CPU");
1303     FLAG_SET_DEFAULT(UseFMA, false);
1304   }
1305 
1306   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1307     UseMD5Intrinsics = true;
1308   }
1309 
1310   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1311     if (FLAG_IS_DEFAULT(UseSHA)) {
1312       UseSHA = true;
1313     }
1314   } else if (UseSHA) {
1315     warning("SHA instructions are not available on this CPU");
1316     FLAG_SET_DEFAULT(UseSHA, false);
1317   }
1318 
1319   if (supports_sha() && supports_sse4_1() && UseSHA) {
1320     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1321       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1322     }
1323   } else if (UseSHA1Intrinsics) {
1324     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1325     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1326   }
1327 
1328   if (supports_sse4_1() && UseSHA) {
1329     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1330       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1331     }
1332   } else if (UseSHA256Intrinsics) {
1333     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1334     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1335   }
1336 
1337   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1338     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1339       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1340     }
1341   } else if (UseSHA512Intrinsics) {
1342     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1343     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1344   }
1345 
1346   if (supports_evex() && supports_avx512bw()) {
1347       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1348           UseSHA3Intrinsics = true;
1349       }
1350   } else if (UseSHA3Intrinsics) {
1351       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1352       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1353   }
1354 
1355   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1356     FLAG_SET_DEFAULT(UseSHA, false);
1357   }
1358 
1359 #if COMPILER2_OR_JVMCI
1360   int max_vector_size = 0;
1361   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1362     // 16 byte vectors (in XMM) are supported with SSE2+
1363     max_vector_size = 16;
1364   } else if (UseAVX == 1 || UseAVX == 2) {
1365     // 32 bytes vectors (in YMM) are only supported with AVX+
1366     max_vector_size = 32;
1367   } else if (UseAVX > 2) {
1368     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1369     max_vector_size = 64;
1370   }
1371 
1372   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1373 
1374   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1375     if (MaxVectorSize < min_vector_size) {
1376       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1377       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1378     }
1379     if (MaxVectorSize > max_vector_size) {
1380       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1381       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1382     }
1383     if (!is_power_of_2(MaxVectorSize)) {
1384       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1385       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1386     }
1387   } else {
1388     // If default, use highest supported configuration
1389     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1390   }
1391 
1392 #if defined(COMPILER2) && defined(ASSERT)
1393   if (MaxVectorSize > 0) {
1394     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1395       tty->print_cr("State of YMM registers after signal handle:");
1396       int nreg = 4;
1397       const char* ymm_name[4] = {"0", "7", "8", "15"};
1398       for (int i = 0; i < nreg; i++) {
1399         tty->print("YMM%s:", ymm_name[i]);
1400         for (int j = 7; j >=0; j--) {
1401           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1402         }
1403         tty->cr();
1404       }
1405     }
1406   }
1407 #endif // COMPILER2 && ASSERT
1408 
1409   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1410     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1411       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1412     }
1413   } else if (UsePoly1305Intrinsics) {
1414     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1415     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1416   }
1417 
1418   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1419     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1420       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1421     }
1422   } else if (UseIntPolyIntrinsics) {
1423     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1424     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1425   }
1426 
1427   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1428     UseMultiplyToLenIntrinsic = true;
1429   }
1430   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1431     UseSquareToLenIntrinsic = true;
1432   }
1433   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1434     UseMulAddIntrinsic = true;
1435   }
1436   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1437     UseMontgomeryMultiplyIntrinsic = true;
1438   }
1439   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1440     UseMontgomerySquareIntrinsic = true;
1441   }
1442 #endif // COMPILER2_OR_JVMCI
1443 
1444   // On new cpus instructions which update whole XMM register should be used
1445   // to prevent partial register stall due to dependencies on high half.
1446   //
1447   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1448   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1449   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1450   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1451 
1452 
1453   if (is_zx()) { // ZX cpus specific settings
1454     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1455       UseStoreImmI16 = false; // don't use it on ZX cpus
1456     }
1457     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1458       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1459         // Use it on all ZX cpus
1460         UseAddressNop = true;
1461       }
1462     }
1463     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1464       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1465     }
1466     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1467       if (supports_sse3()) {
1468         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1469       } else {
1470         UseXmmRegToRegMoveAll = false;
1471       }
1472     }
1473     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1474 #ifdef COMPILER2
1475       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1476         // For new ZX cpus do the next optimization:
1477         // don't align the beginning of a loop if there are enough instructions
1478         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1479         // in current fetch line (OptoLoopAlignment) or the padding
1480         // is big (> MaxLoopPad).
1481         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1482         // generated NOP instructions. 11 is the largest size of one
1483         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1484         MaxLoopPad = 11;
1485       }
1486 #endif // COMPILER2
1487       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1488         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1489       }
1490       if (supports_sse4_2()) { // new ZX cpus
1491         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1492           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1493         }
1494       }
1495     }
1496 
1497     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1498       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1499     }
1500   }
1501 
1502   if (is_amd_family()) { // AMD cpus specific settings
1503     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1504       // Use it on new AMD cpus starting from Opteron.
1505       UseAddressNop = true;
1506     }
1507     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1508       // Use it on new AMD cpus starting from Opteron.
1509       UseNewLongLShift = true;
1510     }
1511     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1512       if (supports_sse4a()) {
1513         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1514       } else {
1515         UseXmmLoadAndClearUpper = false;
1516       }
1517     }
1518     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1519       if (supports_sse4a()) {
1520         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1521       } else {
1522         UseXmmRegToRegMoveAll = false;
1523       }
1524     }
1525     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1526       if (supports_sse4a()) {
1527         UseXmmI2F = true;
1528       } else {
1529         UseXmmI2F = false;
1530       }
1531     }
1532     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1533       if (supports_sse4a()) {
1534         UseXmmI2D = true;
1535       } else {
1536         UseXmmI2D = false;
1537       }
1538     }
1539 
1540     // some defaults for AMD family 15h
1541     if (cpu_family() == 0x15) {
1542       // On family 15h processors default is no sw prefetch
1543       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1544         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1545       }
1546       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1547       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1548         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1549       }
1550       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1551       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1552         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1553       }
1554       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1555         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1556       }
1557     }
1558 
1559 #ifdef COMPILER2
1560     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1561       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1562       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1563     }
1564 #endif // COMPILER2
1565 
1566     // Some defaults for AMD family >= 17h && Hygon family 18h
1567     if (cpu_family() >= 0x17) {
1568       // On family >=17h processors use XMM and UnalignedLoadStores
1569       // for Array Copy
1570       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1571         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1572       }
1573       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1574         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1575       }
1576 #ifdef COMPILER2
1577       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1578         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1579       }
1580 #endif
1581     }
1582   }
1583 
1584   if (is_intel()) { // Intel cpus specific settings
1585     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1586       UseStoreImmI16 = false; // don't use it on Intel cpus
1587     }
1588     if (is_intel_server_family() || cpu_family() == 15) {
1589       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1590         // Use it on all Intel cpus starting from PentiumPro
1591         UseAddressNop = true;
1592       }
1593     }
1594     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1595       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1596     }
1597     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1598       if (supports_sse3()) {
1599         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1600       } else {
1601         UseXmmRegToRegMoveAll = false;
1602       }
1603     }
1604     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1605 #ifdef COMPILER2
1606       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1607         // For new Intel cpus do the next optimization:
1608         // don't align the beginning of a loop if there are enough instructions
1609         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1610         // in current fetch line (OptoLoopAlignment) or the padding
1611         // is big (> MaxLoopPad).
1612         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1613         // generated NOP instructions. 11 is the largest size of one
1614         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1615         MaxLoopPad = 11;
1616       }
1617 #endif // COMPILER2
1618 
1619       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1620         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1621       }
1622       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1623         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1624           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1625         }
1626       }
1627     }
1628     if (is_atom_family() || is_knights_family()) {
1629 #ifdef COMPILER2
1630       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1631         OptoScheduling = true;
1632       }
1633 #endif
1634       if (supports_sse4_2()) { // Silvermont
1635         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1636           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1637         }
1638       }
1639       if (FLAG_IS_DEFAULT(UseIncDec)) {
1640         FLAG_SET_DEFAULT(UseIncDec, false);
1641       }
1642     }
1643     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1644       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1645     }
1646 #ifdef COMPILER2
1647     if (UseAVX > 2) {
1648       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1649           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1650            ArrayOperationPartialInlineSize != 0 &&
1651            ArrayOperationPartialInlineSize != 16 &&
1652            ArrayOperationPartialInlineSize != 32 &&
1653            ArrayOperationPartialInlineSize != 64)) {
1654         int inline_size = 0;
1655         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1656           inline_size = 64;
1657         } else if (MaxVectorSize >= 32) {
1658           inline_size = 32;
1659         } else if (MaxVectorSize >= 16) {
1660           inline_size = 16;
1661         }
1662         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1663           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1664         }
1665         ArrayOperationPartialInlineSize = inline_size;
1666       }
1667 
1668       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1669         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1670         if (ArrayOperationPartialInlineSize) {
1671           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1672         } else {
1673           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1674         }
1675       }
1676     }
1677 #endif
1678   }
1679 
1680 #ifdef COMPILER2
1681   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1682     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1683       OptimizeFill = false;
1684     }
1685   }
1686 #endif
1687   if (supports_sse4_2()) {
1688     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1689       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1690     }
1691   } else {
1692     if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1693       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1694     }
1695     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1696   }
1697   if (UseSSE42Intrinsics) {
1698     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1699       UseVectorizedMismatchIntrinsic = true;
1700     }
1701   } else if (UseVectorizedMismatchIntrinsic) {
1702     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1703       warning("vectorizedMismatch intrinsics are not available on this CPU");
1704     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1705   }
1706   if (UseAVX >= 2) {
1707     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1708   } else if (UseVectorizedHashCodeIntrinsic) {
1709     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1710       warning("vectorizedHashCode intrinsics are not available on this CPU");
1711     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1712   }
1713 
1714   // Use count leading zeros count instruction if available.
1715   if (supports_lzcnt()) {
1716     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1717       UseCountLeadingZerosInstruction = true;
1718     }
1719    } else if (UseCountLeadingZerosInstruction) {
1720     warning("lzcnt instruction is not available on this CPU");
1721     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1722   }
1723 
1724   // Use count trailing zeros instruction if available
1725   if (supports_bmi1()) {
1726     // tzcnt does not require VEX prefix
1727     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1728       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1729         // Don't use tzcnt if BMI1 is switched off on command line.
1730         UseCountTrailingZerosInstruction = false;
1731       } else {
1732         UseCountTrailingZerosInstruction = true;
1733       }
1734     }
1735   } else if (UseCountTrailingZerosInstruction) {
1736     warning("tzcnt instruction is not available on this CPU");
1737     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1738   }
1739 
1740   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1741   // VEX prefix is generated only when AVX > 0.
1742   if (supports_bmi1() && supports_avx()) {
1743     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1744       UseBMI1Instructions = true;
1745     }
1746   } else if (UseBMI1Instructions) {
1747     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1748     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1749   }
1750 
1751   if (supports_bmi2() && supports_avx()) {
1752     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1753       UseBMI2Instructions = true;
1754     }
1755   } else if (UseBMI2Instructions) {
1756     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1757     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1758   }
1759 
1760   // Use population count instruction if available.
1761   if (supports_popcnt()) {
1762     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1763       UsePopCountInstruction = true;
1764     }
1765   } else if (UsePopCountInstruction) {
1766     warning("POPCNT instruction is not available on this CPU");
1767     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1768   }
1769 
1770   // Use fast-string operations if available.
1771   if (supports_erms()) {
1772     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1773       UseFastStosb = true;
1774     }
1775   } else if (UseFastStosb) {
1776     warning("fast-string operations are not available on this CPU");
1777     FLAG_SET_DEFAULT(UseFastStosb, false);
1778   }
1779 
1780   // For AMD Processors use XMM/YMM MOVDQU instructions
1781   // for Object Initialization as default
1782   if (is_amd() && cpu_family() >= 0x19) {
1783     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1784       UseFastStosb = false;
1785     }
1786   }
1787 
1788 #ifdef COMPILER2
1789   if (is_intel() && MaxVectorSize > 16) {
1790     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1791       UseFastStosb = false;
1792     }
1793   }
1794 #endif
1795 
1796   // Use XMM/YMM MOVDQU instruction for Object Initialization
1797   if (!UseFastStosb && UseUnalignedLoadStores) {
1798     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1799       UseXMMForObjInit = true;
1800     }
1801   } else if (UseXMMForObjInit) {
1802     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1803     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1804   }
1805 
1806 #ifdef COMPILER2
1807   if (FLAG_IS_DEFAULT(AlignVector)) {
1808     // Modern processors allow misaligned memory operations for vectors.
1809     AlignVector = !UseUnalignedLoadStores;
1810   }
1811 #endif // COMPILER2
1812 
1813   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1814     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1815       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1816     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1817       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1818     }
1819   }
1820 
1821   // Allocation prefetch settings
1822   int cache_line_size = checked_cast<int>(prefetch_data_size());
1823   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1824       (cache_line_size > AllocatePrefetchStepSize)) {
1825     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1826   }
1827 
1828   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1829     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1830     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1831       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1832     }
1833     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1834   }
1835 
1836   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1837     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1838     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1839   }
1840 
1841   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1842     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1843         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1844       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1845     }
1846 #ifdef COMPILER2
1847     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1848       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1849     }
1850 #endif
1851   }
1852 
1853   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1854 #ifdef COMPILER2
1855     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1856       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1857     }
1858 #endif
1859   }
1860 
1861   // Prefetch settings
1862 
1863   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1864   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1865   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1866   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1867 
1868   // gc copy/scan is disabled if prefetchw isn't supported, because
1869   // Prefetch::write emits an inlined prefetchw on Linux.
1870   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1871   // The used prefetcht0 instruction works for both amd64 and em64t.
1872 
1873   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1874     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1875   }
1876   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1877     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1878   }
1879 
1880   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1881      (cache_line_size > ContendedPaddingWidth))
1882      ContendedPaddingWidth = cache_line_size;
1883 
1884   // This machine allows unaligned memory accesses
1885   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1886     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1887   }
1888 
1889 #ifndef PRODUCT
1890   if (log_is_enabled(Info, os, cpu)) {
1891     LogStream ls(Log(os, cpu)::info());
1892     outputStream* log = &ls;
1893     log->print_cr("Logical CPUs per core: %u",
1894                   logical_processors_per_package());
1895     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1896     log->print("UseSSE=%d", UseSSE);
1897     if (UseAVX > 0) {
1898       log->print("  UseAVX=%d", UseAVX);
1899     }
1900     if (UseAES) {
1901       log->print("  UseAES=1");
1902     }
1903 #ifdef COMPILER2
1904     if (MaxVectorSize > 0) {
1905       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1906     }
1907 #endif
1908     log->cr();
1909     log->print("Allocation");
1910     if (AllocatePrefetchStyle <= 0) {
1911       log->print_cr(": no prefetching");
1912     } else {
1913       log->print(" prefetching: ");
1914       if (AllocatePrefetchInstr == 0) {
1915         log->print("PREFETCHNTA");
1916       } else if (AllocatePrefetchInstr == 1) {
1917         log->print("PREFETCHT0");
1918       } else if (AllocatePrefetchInstr == 2) {
1919         log->print("PREFETCHT2");
1920       } else if (AllocatePrefetchInstr == 3) {
1921         log->print("PREFETCHW");
1922       }
1923       if (AllocatePrefetchLines > 1) {
1924         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1925       } else {
1926         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1927       }
1928     }
1929 
1930     if (PrefetchCopyIntervalInBytes > 0) {
1931       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1932     }
1933     if (PrefetchScanIntervalInBytes > 0) {
1934       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1935     }
1936     if (ContendedPaddingWidth > 0) {
1937       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1938     }
1939   }
1940 #endif // !PRODUCT
1941   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1942       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1943   }
1944   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1945       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1946   }
1947 }
1948 
1949 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1950   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1951   if (vrt == XenHVM) {
1952     st->print_cr("Xen hardware-assisted virtualization detected");
1953   } else if (vrt == KVM) {
1954     st->print_cr("KVM virtualization detected");
1955   } else if (vrt == VMWare) {
1956     st->print_cr("VMWare virtualization detected");
1957     VirtualizationSupport::print_virtualization_info(st);
1958   } else if (vrt == HyperV) {
1959     st->print_cr("Hyper-V virtualization detected");
1960   } else if (vrt == HyperVRole) {
1961     st->print_cr("Hyper-V role detected");
1962   }
1963 }
1964 
1965 bool VM_Version::compute_has_intel_jcc_erratum() {
1966   if (!is_intel_family_core()) {
1967     // Only Intel CPUs are affected.
1968     return false;
1969   }
1970   // The following table of affected CPUs is based on the following document released by Intel:
1971   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1972   switch (_model) {
1973   case 0x8E:
1974     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1975     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1976     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1977     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1978     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1979     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1980     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1981     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1982     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1983     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1984   case 0x4E:
1985     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1986     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1987     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1988     return _stepping == 0x3;
1989   case 0x55:
1990     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1991     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1992     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1993     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1994     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1995     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1996     return _stepping == 0x4 || _stepping == 0x7;
1997   case 0x5E:
1998     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1999     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2000     return _stepping == 0x3;
2001   case 0x9E:
2002     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2003     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2004     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2005     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2006     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2007     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2008     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2009     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2010     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2011     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2012     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2013     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2014     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2015     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2016     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2017   case 0xA5:
2018     // Not in Intel documentation.
2019     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2020     return true;
2021   case 0xA6:
2022     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2023     return _stepping == 0x0;
2024   case 0xAE:
2025     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2026     return _stepping == 0xA;
2027   default:
2028     // If we are running on another intel machine not recognized in the table, we are okay.
2029     return false;
2030   }
2031 }
2032 
2033 // On Xen, the cpuid instruction returns
2034 //  eax / registers[0]: Version of Xen
2035 //  ebx / registers[1]: chars 'XenV'
2036 //  ecx / registers[2]: chars 'MMXe'
2037 //  edx / registers[3]: chars 'nVMM'
2038 //
2039 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2040 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2041 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2042 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2043 //
2044 // more information :
2045 // https://kb.vmware.com/s/article/1009458
2046 //
2047 void VM_Version::check_virtualizations() {
2048   uint32_t registers[4] = {0};
2049   char signature[13] = {0};
2050 
2051   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2052   // from 0x40000000 until 0x40010000.
2053   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2054   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2055     detect_virt_stub(leaf, registers);
2056     memcpy(signature, &registers[1], 12);
2057 
2058     if (strncmp("VMwareVMware", signature, 12) == 0) {
2059       Abstract_VM_Version::_detected_virtualization = VMWare;
2060       // check for extended metrics from guestlib
2061       VirtualizationSupport::initialize();
2062     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2063       Abstract_VM_Version::_detected_virtualization = HyperV;
2064 #ifdef _WINDOWS
2065       // CPUID leaf 0x40000007 is available to the root partition only.
2066       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2067       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2068       detect_virt_stub(0x40000007, registers);
2069       if ((registers[0] != 0x0) ||
2070           (registers[1] != 0x0) ||
2071           (registers[2] != 0x0) ||
2072           (registers[3] != 0x0)) {
2073         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2074       }
2075 #endif
2076     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2077       Abstract_VM_Version::_detected_virtualization = KVM;
2078     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2079       Abstract_VM_Version::_detected_virtualization = XenHVM;
2080     }
2081   }
2082 }
2083 
2084 #ifdef COMPILER2
2085 // Determine if it's running on Cascade Lake using default options.
2086 bool VM_Version::is_default_intel_cascade_lake() {
2087   return FLAG_IS_DEFAULT(UseAVX) &&
2088          FLAG_IS_DEFAULT(MaxVectorSize) &&
2089          UseAVX > 2 &&
2090          is_intel_cascade_lake();
2091 }
2092 #endif
2093 
2094 bool VM_Version::is_intel_cascade_lake() {
2095   return is_intel_skylake() && _stepping >= 5;
2096 }
2097 
2098 bool VM_Version::is_intel_darkmont() {
2099   return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2100 }
2101 
2102 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2103 // for implementing the array copy and clear operations.
2104 // The Intel platforms that supports the serialize instruction
2105 // has improved implementation of 64-byte load/stores and so the default
2106 // threshold is set to 0 for these platforms.
2107 int VM_Version::avx3_threshold() {
2108   return (is_intel_server_family() &&
2109           supports_serialize() &&
2110           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2111 }
2112 
2113 void VM_Version::clear_apx_test_state() {
2114   clear_apx_test_state_stub();
2115 }
2116 
2117 static bool _vm_version_initialized = false;
2118 
2119 void VM_Version::initialize() {
2120   ResourceMark rm;
2121 
2122   // Making this stub must be FIRST use of assembler
2123   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2124   if (stub_blob == nullptr) {
2125     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2126   }
2127   CodeBuffer c(stub_blob);
2128   VM_Version_StubGenerator g(&c);
2129 
2130   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2131                                      g.generate_get_cpu_info());
2132   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2133                                      g.generate_detect_virt());
2134   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2135                                      g.clear_apx_test_state());
2136   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2137                                      g.generate_getCPUIDBrandString());
2138   get_processor_features();
2139 
2140   Assembler::precompute_instructions();
2141 
2142   if (VM_Version::supports_hv()) { // Supports hypervisor
2143     check_virtualizations();
2144   }
2145   _vm_version_initialized = true;
2146 }
2147 
2148 typedef enum {
2149    CPU_FAMILY_8086_8088  = 0,
2150    CPU_FAMILY_INTEL_286  = 2,
2151    CPU_FAMILY_INTEL_386  = 3,
2152    CPU_FAMILY_INTEL_486  = 4,
2153    CPU_FAMILY_PENTIUM    = 5,
2154    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2155    CPU_FAMILY_PENTIUM_4  = 0xF
2156 } FamilyFlag;
2157 
2158 typedef enum {
2159   RDTSCP_FLAG  = 0x08000000, // bit 27
2160   INTEL64_FLAG = 0x20000000  // bit 29
2161 } _featureExtendedEdxFlag;
2162 
2163 typedef enum {
2164    FPU_FLAG     = 0x00000001,
2165    VME_FLAG     = 0x00000002,
2166    DE_FLAG      = 0x00000004,
2167    PSE_FLAG     = 0x00000008,
2168    TSC_FLAG     = 0x00000010,
2169    MSR_FLAG     = 0x00000020,
2170    PAE_FLAG     = 0x00000040,
2171    MCE_FLAG     = 0x00000080,
2172    CX8_FLAG     = 0x00000100,
2173    APIC_FLAG    = 0x00000200,
2174    SEP_FLAG     = 0x00000800,
2175    MTRR_FLAG    = 0x00001000,
2176    PGE_FLAG     = 0x00002000,
2177    MCA_FLAG     = 0x00004000,
2178    CMOV_FLAG    = 0x00008000,
2179    PAT_FLAG     = 0x00010000,
2180    PSE36_FLAG   = 0x00020000,
2181    PSNUM_FLAG   = 0x00040000,
2182    CLFLUSH_FLAG = 0x00080000,
2183    DTS_FLAG     = 0x00200000,
2184    ACPI_FLAG    = 0x00400000,
2185    MMX_FLAG     = 0x00800000,
2186    FXSR_FLAG    = 0x01000000,
2187    SSE_FLAG     = 0x02000000,
2188    SSE2_FLAG    = 0x04000000,
2189    SS_FLAG      = 0x08000000,
2190    HTT_FLAG     = 0x10000000,
2191    TM_FLAG      = 0x20000000
2192 } FeatureEdxFlag;
2193 
2194 // VM_Version statics
2195 enum {
2196   ExtendedFamilyIdLength_INTEL = 16,
2197   ExtendedFamilyIdLength_AMD   = 24
2198 };
2199 
2200 const size_t VENDOR_LENGTH = 13;
2201 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2202 static char* _cpu_brand_string = nullptr;
2203 static int64_t _max_qualified_cpu_frequency = 0;
2204 
2205 static int _no_of_threads = 0;
2206 static int _no_of_cores = 0;
2207 
2208 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2209   "8086/8088",
2210   "",
2211   "286",
2212   "386",
2213   "486",
2214   "Pentium",
2215   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2216   "",
2217   "",
2218   "",
2219   "",
2220   "",
2221   "",
2222   "",
2223   "",
2224   "Pentium 4"
2225 };
2226 
2227 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2228   "",
2229   "",
2230   "",
2231   "",
2232   "5x86",
2233   "K5/K6",
2234   "Athlon/AthlonXP",
2235   "",
2236   "",
2237   "",
2238   "",
2239   "",
2240   "",
2241   "",
2242   "",
2243   "Opteron/Athlon64",
2244   "Opteron QC/Phenom",  // Barcelona et.al.
2245   "",
2246   "",
2247   "",
2248   "",
2249   "",
2250   "",
2251   "Zen"
2252 };
2253 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2254 // September 2013, Vol 3C Table 35-1
2255 const char* const _model_id_pentium_pro[] = {
2256   "",
2257   "Pentium Pro",
2258   "",
2259   "Pentium II model 3",
2260   "",
2261   "Pentium II model 5/Xeon/Celeron",
2262   "Celeron",
2263   "Pentium III/Pentium III Xeon",
2264   "Pentium III/Pentium III Xeon",
2265   "Pentium M model 9",    // Yonah
2266   "Pentium III, model A",
2267   "Pentium III, model B",
2268   "",
2269   "Pentium M model D",    // Dothan
2270   "",
2271   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2272   "",
2273   "",
2274   "",
2275   "",
2276   "",
2277   "",
2278   "Celeron",              // 0x16 Celeron 65nm
2279   "Core 2",               // 0x17 Penryn / Harpertown
2280   "",
2281   "",
2282   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2283   "Atom",                 // 0x1B Z5xx series Silverthorn
2284   "",
2285   "Core 2",               // 0x1D Dunnington (6-core)
2286   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2287   "",
2288   "",
2289   "",
2290   "",
2291   "",
2292   "",
2293   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2294   "",
2295   "",
2296   "",                     // 0x28
2297   "",
2298   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2299   "",
2300   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2301   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2302   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2303   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2304   "",
2305   "",
2306   "",
2307   "",
2308   "",
2309   "",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "Ivy Bridge",           // 0x3a
2315   "",
2316   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2317   "",                     // 0x3d "Next Generation Intel Core Processor"
2318   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2319   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2320   "",
2321   "",
2322   "",
2323   "",
2324   "",
2325   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2326   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2327   nullptr
2328 };
2329 
2330 /* Brand ID is for back compatibility
2331  * Newer CPUs uses the extended brand string */
2332 const char* const _brand_id[] = {
2333   "",
2334   "Celeron processor",
2335   "Pentium III processor",
2336   "Intel Pentium III Xeon processor",
2337   "",
2338   "",
2339   "",
2340   "",
2341   "Intel Pentium 4 processor",
2342   nullptr
2343 };
2344 
2345 
2346 const char* const _feature_edx_id[] = {
2347   "On-Chip FPU",
2348   "Virtual Mode Extensions",
2349   "Debugging Extensions",
2350   "Page Size Extensions",
2351   "Time Stamp Counter",
2352   "Model Specific Registers",
2353   "Physical Address Extension",
2354   "Machine Check Exceptions",
2355   "CMPXCHG8B Instruction",
2356   "On-Chip APIC",
2357   "",
2358   "Fast System Call",
2359   "Memory Type Range Registers",
2360   "Page Global Enable",
2361   "Machine Check Architecture",
2362   "Conditional Mov Instruction",
2363   "Page Attribute Table",
2364   "36-bit Page Size Extension",
2365   "Processor Serial Number",
2366   "CLFLUSH Instruction",
2367   "",
2368   "Debug Trace Store feature",
2369   "ACPI registers in MSR space",
2370   "Intel Architecture MMX Technology",
2371   "Fast Float Point Save and Restore",
2372   "Streaming SIMD extensions",
2373   "Streaming SIMD extensions 2",
2374   "Self-Snoop",
2375   "Hyper Threading",
2376   "Thermal Monitor",
2377   "",
2378   "Pending Break Enable"
2379 };
2380 
2381 const char* const _feature_extended_edx_id[] = {
2382   "",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "",
2392   "",
2393   "SYSCALL/SYSRET",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "Execute Disable Bit",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "",
2408   "",
2409   "RDTSCP",
2410   "",
2411   "Intel 64 Architecture",
2412   "",
2413   ""
2414 };
2415 
2416 const char* const _feature_ecx_id[] = {
2417   "Streaming SIMD Extensions 3",
2418   "PCLMULQDQ",
2419   "64-bit DS Area",
2420   "MONITOR/MWAIT instructions",
2421   "CPL Qualified Debug Store",
2422   "Virtual Machine Extensions",
2423   "Safer Mode Extensions",
2424   "Enhanced Intel SpeedStep technology",
2425   "Thermal Monitor 2",
2426   "Supplemental Streaming SIMD Extensions 3",
2427   "L1 Context ID",
2428   "",
2429   "Fused Multiply-Add",
2430   "CMPXCHG16B",
2431   "xTPR Update Control",
2432   "Perfmon and Debug Capability",
2433   "",
2434   "Process-context identifiers",
2435   "Direct Cache Access",
2436   "Streaming SIMD extensions 4.1",
2437   "Streaming SIMD extensions 4.2",
2438   "x2APIC",
2439   "MOVBE",
2440   "Popcount instruction",
2441   "TSC-Deadline",
2442   "AESNI",
2443   "XSAVE",
2444   "OSXSAVE",
2445   "AVX",
2446   "F16C",
2447   "RDRAND",
2448   ""
2449 };
2450 
2451 const char* const _feature_extended_ecx_id[] = {
2452   "LAHF/SAHF instruction support",
2453   "Core multi-processor legacy mode",
2454   "",
2455   "",
2456   "",
2457   "Advanced Bit Manipulations: LZCNT",
2458   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2459   "Misaligned SSE mode",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "",
2483   ""
2484 };
2485 
2486 const char* VM_Version::cpu_model_description(void) {
2487   uint32_t cpu_family = extended_cpu_family();
2488   uint32_t cpu_model = extended_cpu_model();
2489   const char* model = nullptr;
2490 
2491   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2492     for (uint32_t i = 0; i <= cpu_model; i++) {
2493       model = _model_id_pentium_pro[i];
2494       if (model == nullptr) {
2495         break;
2496       }
2497     }
2498   }
2499   return model;
2500 }
2501 
2502 const char* VM_Version::cpu_brand_string(void) {
2503   if (_cpu_brand_string == nullptr) {
2504     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2505     if (nullptr == _cpu_brand_string) {
2506       return nullptr;
2507     }
2508     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2509     if (ret_val != OS_OK) {
2510       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2511       _cpu_brand_string = nullptr;
2512     }
2513   }
2514   return _cpu_brand_string;
2515 }
2516 
2517 const char* VM_Version::cpu_brand(void) {
2518   const char*  brand  = nullptr;
2519 
2520   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2521     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2522     brand = _brand_id[0];
2523     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2524       brand = _brand_id[i];
2525     }
2526   }
2527   return brand;
2528 }
2529 
2530 bool VM_Version::cpu_is_em64t(void) {
2531   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2532 }
2533 
2534 bool VM_Version::is_netburst(void) {
2535   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2536 }
2537 
2538 bool VM_Version::supports_tscinv_ext(void) {
2539   if (!supports_tscinv_bit()) {
2540     return false;
2541   }
2542 
2543   if (is_intel()) {
2544     return true;
2545   }
2546 
2547   if (is_amd()) {
2548     return !is_amd_Barcelona();
2549   }
2550 
2551   if (is_hygon()) {
2552     return true;
2553   }
2554 
2555   return false;
2556 }
2557 
2558 void VM_Version::resolve_cpu_information_details(void) {
2559 
2560   // in future we want to base this information on proper cpu
2561   // and cache topology enumeration such as:
2562   // Intel 64 Architecture Processor Topology Enumeration
2563   // which supports system cpu and cache topology enumeration
2564   // either using 2xAPICIDs or initial APICIDs
2565 
2566   // currently only rough cpu information estimates
2567   // which will not necessarily reflect the exact configuration of the system
2568 
2569   // this is the number of logical hardware threads
2570   // visible to the operating system
2571   _no_of_threads = os::processor_count();
2572 
2573   // find out number of threads per cpu package
2574   int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2575   if (threads_per_package == 0) {
2576     // Fallback code to avoid div by zero in subsequent code.
2577     // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2578     threads_per_package = threads_per_core() * cores_per_cpu();
2579   }
2580 
2581   // use amount of threads visible to the process in order to guess number of sockets
2582   _no_of_sockets = _no_of_threads / threads_per_package;
2583 
2584   // process might only see a subset of the total number of threads
2585   // from a single processor package. Virtualization/resource management for example.
2586   // If so then just write a hard 1 as num of pkgs.
2587   if (0 == _no_of_sockets) {
2588     _no_of_sockets = 1;
2589   }
2590 
2591   // estimate the number of cores
2592   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2593 }
2594 
2595 
2596 const char* VM_Version::cpu_family_description(void) {
2597   int cpu_family_id = extended_cpu_family();
2598   if (is_amd()) {
2599     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2600       return _family_id_amd[cpu_family_id];
2601     }
2602   }
2603   if (is_intel()) {
2604     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2605       return cpu_model_description();
2606     }
2607     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2608       return _family_id_intel[cpu_family_id];
2609     }
2610   }
2611   if (is_hygon()) {
2612     return "Dhyana";
2613   }
2614   return "Unknown x86";
2615 }
2616 
2617 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2618   assert(buf != nullptr, "buffer is null!");
2619   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2620 
2621   const char* cpu_type = nullptr;
2622   const char* x64 = nullptr;
2623 
2624   if (is_intel()) {
2625     cpu_type = "Intel";
2626     x64 = cpu_is_em64t() ? " Intel64" : "";
2627   } else if (is_amd()) {
2628     cpu_type = "AMD";
2629     x64 = cpu_is_em64t() ? " AMD64" : "";
2630   } else if (is_hygon()) {
2631     cpu_type = "Hygon";
2632     x64 = cpu_is_em64t() ? " AMD64" : "";
2633   } else {
2634     cpu_type = "Unknown x86";
2635     x64 = cpu_is_em64t() ? " x86_64" : "";
2636   }
2637 
2638   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2639     cpu_type,
2640     cpu_family_description(),
2641     supports_ht() ? " (HT)" : "",
2642     supports_sse3() ? " SSE3" : "",
2643     supports_ssse3() ? " SSSE3" : "",
2644     supports_sse4_1() ? " SSE4.1" : "",
2645     supports_sse4_2() ? " SSE4.2" : "",
2646     supports_sse4a() ? " SSE4A" : "",
2647     is_netburst() ? " Netburst" : "",
2648     is_intel_family_core() ? " Core" : "",
2649     x64);
2650 
2651   return OS_OK;
2652 }
2653 
2654 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2655   assert(buf != nullptr, "buffer is null!");
2656   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2657   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2658 
2659   // invoke newly generated asm code to fetch CPU Brand String
2660   getCPUIDBrandString_stub(&_cpuid_info);
2661 
2662   // fetch results into buffer
2663   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2664   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2665   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2666   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2667   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2668   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2669   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2670   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2671   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2672   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2673   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2674   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2675 
2676   return OS_OK;
2677 }
2678 
2679 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2680   guarantee(buf != nullptr, "buffer is null!");
2681   guarantee(buf_len > 0, "buffer len not enough!");
2682 
2683   unsigned int flag = 0;
2684   unsigned int fi = 0;
2685   size_t       written = 0;
2686   const char*  prefix = "";
2687 
2688 #define WRITE_TO_BUF(string)                                                          \
2689   {                                                                                   \
2690     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2691     if (res < 0) {                                                                    \
2692       return buf_len - 1;                                                             \
2693     }                                                                                 \
2694     written += res;                                                                   \
2695     if (prefix[0] == '\0') {                                                          \
2696       prefix = ", ";                                                                  \
2697     }                                                                                 \
2698   }
2699 
2700   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2701     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2702       continue; /* no hyperthreading */
2703     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2704       continue; /* no fast system call */
2705     }
2706     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2707       WRITE_TO_BUF(_feature_edx_id[fi]);
2708     }
2709   }
2710 
2711   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2712     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2713       WRITE_TO_BUF(_feature_ecx_id[fi]);
2714     }
2715   }
2716 
2717   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2718     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2719       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2720     }
2721   }
2722 
2723   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2724     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2725       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2726     }
2727   }
2728 
2729   if (supports_tscinv_bit()) {
2730       WRITE_TO_BUF("Invariant TSC");
2731   }
2732 
2733   if (supports_hybrid()) {
2734       WRITE_TO_BUF("Hybrid Architecture");
2735   }
2736 
2737   return written;
2738 }
2739 
2740 /**
2741  * Write a detailed description of the cpu to a given buffer, including
2742  * feature set.
2743  */
2744 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2745   assert(buf != nullptr, "buffer is null!");
2746   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2747 
2748   static const char* unknown = "<unknown>";
2749   char               vendor_id[VENDOR_LENGTH];
2750   const char*        family = nullptr;
2751   const char*        model = nullptr;
2752   const char*        brand = nullptr;
2753   int                outputLen = 0;
2754 
2755   family = cpu_family_description();
2756   if (family == nullptr) {
2757     family = unknown;
2758   }
2759 
2760   model = cpu_model_description();
2761   if (model == nullptr) {
2762     model = unknown;
2763   }
2764 
2765   brand = cpu_brand_string();
2766 
2767   if (brand == nullptr) {
2768     brand = cpu_brand();
2769     if (brand == nullptr) {
2770       brand = unknown;
2771     }
2772   }
2773 
2774   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2775   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2776   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2777   vendor_id[VENDOR_LENGTH-1] = '\0';
2778 
2779   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2780     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2781     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2782     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2783     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2784     "Supports: ",
2785     brand,
2786     vendor_id,
2787     family,
2788     extended_cpu_family(),
2789     model,
2790     extended_cpu_model(),
2791     cpu_stepping(),
2792     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2793     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2794     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2795     _cpuid_info.std_cpuid1_eax.value,
2796     _cpuid_info.std_cpuid1_ebx.value,
2797     _cpuid_info.std_cpuid1_ecx.value,
2798     _cpuid_info.std_cpuid1_edx.value,
2799     _cpuid_info.ext_cpuid1_eax,
2800     _cpuid_info.ext_cpuid1_ebx,
2801     _cpuid_info.ext_cpuid1_ecx,
2802     _cpuid_info.ext_cpuid1_edx);
2803 
2804   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2805     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2806     return OS_ERR;
2807   }
2808 
2809   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2810 
2811   return OS_OK;
2812 }
2813 
2814 
2815 // Fill in Abstract_VM_Version statics
2816 void VM_Version::initialize_cpu_information() {
2817   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2818   assert(!_initialized, "shouldn't be initialized yet");
2819   resolve_cpu_information_details();
2820 
2821   // initialize cpu_name and cpu_desc
2822   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2823   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2824   _initialized = true;
2825 }
2826 
2827 /**
2828  *  For information about extracting the frequency from the cpu brand string, please see:
2829  *
2830  *    Intel Processor Identification and the CPUID Instruction
2831  *    Application Note 485
2832  *    May 2012
2833  *
2834  * The return value is the frequency in Hz.
2835  */
2836 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2837   const char* const brand_string = cpu_brand_string();
2838   if (brand_string == nullptr) {
2839     return 0;
2840   }
2841   const int64_t MEGA = 1000000;
2842   int64_t multiplier = 0;
2843   int64_t frequency = 0;
2844   uint8_t idx = 0;
2845   // The brand string buffer is at most 48 bytes.
2846   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2847   for (; idx < 48-2; ++idx) {
2848     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2849     // Search brand string for "yHz" where y is M, G, or T.
2850     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2851       if (brand_string[idx] == 'M') {
2852         multiplier = MEGA;
2853       } else if (brand_string[idx] == 'G') {
2854         multiplier = MEGA * 1000;
2855       } else if (brand_string[idx] == 'T') {
2856         multiplier = MEGA * MEGA;
2857       }
2858       break;
2859     }
2860   }
2861   if (multiplier > 0) {
2862     // Compute frequency (in Hz) from brand string.
2863     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2864       frequency =  (brand_string[idx-4] - '0') * multiplier;
2865       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2866       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2867     } else { // format is "xxxx"
2868       frequency =  (brand_string[idx-4] - '0') * 1000;
2869       frequency += (brand_string[idx-3] - '0') * 100;
2870       frequency += (brand_string[idx-2] - '0') * 10;
2871       frequency += (brand_string[idx-1] - '0');
2872       frequency *= multiplier;
2873     }
2874   }
2875   return frequency;
2876 }
2877 
2878 
2879 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2880   if (_max_qualified_cpu_frequency == 0) {
2881     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2882   }
2883   return _max_qualified_cpu_frequency;
2884 }
2885 
2886 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2887   VM_Features vm_features;
2888   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2889     vm_features.set_feature(CPU_CX8);
2890   if (std_cpuid1_edx.bits.cmov != 0)
2891     vm_features.set_feature(CPU_CMOV);
2892   if (std_cpuid1_edx.bits.clflush != 0)
2893     vm_features.set_feature(CPU_FLUSH);
2894   // clflush should always be available on x86_64
2895   // if not we are in real trouble because we rely on it
2896   // to flush the code cache.
2897   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2898   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2899       ext_cpuid1_edx.bits.fxsr != 0))
2900     vm_features.set_feature(CPU_FXSR);
2901   // HT flag is set for multi-core processors also.
2902   if (threads_per_core() > 1)
2903     vm_features.set_feature(CPU_HT);
2904   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2905       ext_cpuid1_edx.bits.mmx != 0))
2906     vm_features.set_feature(CPU_MMX);
2907   if (std_cpuid1_edx.bits.sse != 0)
2908     vm_features.set_feature(CPU_SSE);
2909   if (std_cpuid1_edx.bits.sse2 != 0)
2910     vm_features.set_feature(CPU_SSE2);
2911   if (std_cpuid1_ecx.bits.sse3 != 0)
2912     vm_features.set_feature(CPU_SSE3);
2913   if (std_cpuid1_ecx.bits.ssse3 != 0)
2914     vm_features.set_feature(CPU_SSSE3);
2915   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2916     vm_features.set_feature(CPU_SSE4_1);
2917   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2918     vm_features.set_feature(CPU_SSE4_2);
2919   if (std_cpuid1_ecx.bits.popcnt != 0)
2920     vm_features.set_feature(CPU_POPCNT);
2921   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2922       xem_xcr0_eax.bits.apx_f != 0 &&
2923       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2924     vm_features.set_feature(CPU_APX_F);
2925   }
2926   if (std_cpuid1_ecx.bits.avx != 0 &&
2927       std_cpuid1_ecx.bits.osxsave != 0 &&
2928       xem_xcr0_eax.bits.sse != 0 &&
2929       xem_xcr0_eax.bits.ymm != 0) {
2930     vm_features.set_feature(CPU_AVX);
2931     vm_features.set_feature(CPU_VZEROUPPER);
2932     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2933       vm_features.set_feature(CPU_SHA512);
2934     if (std_cpuid1_ecx.bits.f16c != 0)
2935       vm_features.set_feature(CPU_F16C);
2936     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2937       vm_features.set_feature(CPU_AVX2);
2938       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2939         vm_features.set_feature(CPU_AVX_IFMA);
2940     }
2941     if (sef_cpuid7_ecx.bits.gfni != 0)
2942         vm_features.set_feature(CPU_GFNI);
2943     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2944         xem_xcr0_eax.bits.opmask != 0 &&
2945         xem_xcr0_eax.bits.zmm512 != 0 &&
2946         xem_xcr0_eax.bits.zmm32 != 0) {
2947       vm_features.set_feature(CPU_AVX512F);
2948       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2949         vm_features.set_feature(CPU_AVX512CD);
2950       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2951         vm_features.set_feature(CPU_AVX512DQ);
2952       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2953         vm_features.set_feature(CPU_AVX512_IFMA);
2954       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2955         vm_features.set_feature(CPU_AVX512PF);
2956       if (sef_cpuid7_ebx.bits.avx512er != 0)
2957         vm_features.set_feature(CPU_AVX512ER);
2958       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2959         vm_features.set_feature(CPU_AVX512BW);
2960       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2961         vm_features.set_feature(CPU_AVX512VL);
2962       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2963         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2964       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2965         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2966       if (sef_cpuid7_ecx.bits.vaes != 0)
2967         vm_features.set_feature(CPU_AVX512_VAES);
2968       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2969         vm_features.set_feature(CPU_AVX512_VNNI);
2970       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2971         vm_features.set_feature(CPU_AVX512_BITALG);
2972       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2973         vm_features.set_feature(CPU_AVX512_VBMI);
2974       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2975         vm_features.set_feature(CPU_AVX512_VBMI2);
2976     }
2977     if (is_intel()) {
2978       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2979           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2980           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2981           xem_xcr0_eax.bits.opmask != 0 &&
2982           xem_xcr0_eax.bits.zmm512 != 0 &&
2983           xem_xcr0_eax.bits.zmm32 != 0) {
2984         vm_features.set_feature(CPU_AVX10_1);
2985         vm_features.set_feature(CPU_AVX512F);
2986         vm_features.set_feature(CPU_AVX512CD);
2987         vm_features.set_feature(CPU_AVX512DQ);
2988         vm_features.set_feature(CPU_AVX512PF);
2989         vm_features.set_feature(CPU_AVX512ER);
2990         vm_features.set_feature(CPU_AVX512BW);
2991         vm_features.set_feature(CPU_AVX512VL);
2992         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2993         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2994         vm_features.set_feature(CPU_AVX512_VAES);
2995         vm_features.set_feature(CPU_AVX512_VNNI);
2996         vm_features.set_feature(CPU_AVX512_BITALG);
2997         vm_features.set_feature(CPU_AVX512_VBMI);
2998         vm_features.set_feature(CPU_AVX512_VBMI2);
2999         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3000           vm_features.set_feature(CPU_AVX10_2);
3001         }
3002       }
3003     }
3004   }
3005 
3006   if (std_cpuid1_ecx.bits.hv != 0)
3007     vm_features.set_feature(CPU_HV);
3008   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3009     vm_features.set_feature(CPU_BMI1);
3010   if (std_cpuid1_edx.bits.tsc != 0)
3011     vm_features.set_feature(CPU_TSC);
3012   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3013     vm_features.set_feature(CPU_TSCINV_BIT);
3014   if (std_cpuid1_ecx.bits.aes != 0)
3015     vm_features.set_feature(CPU_AES);
3016   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3017     vm_features.set_feature(CPU_LZCNT);
3018   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3019     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3020   if (sef_cpuid7_ebx.bits.erms != 0)
3021     vm_features.set_feature(CPU_ERMS);
3022   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3023     vm_features.set_feature(CPU_FSRM);
3024   if (std_cpuid1_ecx.bits.clmul != 0)
3025     vm_features.set_feature(CPU_CLMUL);
3026   if (sef_cpuid7_ebx.bits.rtm != 0)
3027     vm_features.set_feature(CPU_RTM);
3028   if (sef_cpuid7_ebx.bits.adx != 0)
3029      vm_features.set_feature(CPU_ADX);
3030   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3031     vm_features.set_feature(CPU_BMI2);
3032   if (sef_cpuid7_ebx.bits.sha != 0)
3033     vm_features.set_feature(CPU_SHA);
3034   if (std_cpuid1_ecx.bits.fma != 0)
3035     vm_features.set_feature(CPU_FMA);
3036   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3037     vm_features.set_feature(CPU_FLUSHOPT);
3038   if (sef_cpuid7_ebx.bits.clwb != 0)
3039     vm_features.set_feature(CPU_CLWB);
3040   if (ext_cpuid1_edx.bits.rdtscp != 0)
3041     vm_features.set_feature(CPU_RDTSCP);
3042   if (sef_cpuid7_ecx.bits.rdpid != 0)
3043     vm_features.set_feature(CPU_RDPID);
3044 
3045   // AMD|Hygon additional features.
3046   if (is_amd_family()) {
3047     // PREFETCHW was checked above, check TDNOW here.
3048     if ((ext_cpuid1_edx.bits.tdnow != 0))
3049       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3050     if (ext_cpuid1_ecx.bits.sse4a != 0)
3051       vm_features.set_feature(CPU_SSE4A);
3052   }
3053 
3054   // Intel additional features.
3055   if (is_intel()) {
3056     if (sef_cpuid7_edx.bits.serialize != 0)
3057       vm_features.set_feature(CPU_SERIALIZE);
3058     if (sef_cpuid7_edx.bits.hybrid != 0)
3059       vm_features.set_feature(CPU_HYBRID);
3060     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3061       vm_features.set_feature(CPU_AVX512_FP16);
3062   }
3063 
3064   // ZX additional features.
3065   if (is_zx()) {
3066     // We do not know if these are supported by ZX, so we cannot trust
3067     // common CPUID bit for them.
3068     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3069     vm_features.clear_feature(CPU_CLWB);
3070   }
3071 
3072   // Protection key features.
3073   if (sef_cpuid7_ecx.bits.pku != 0) {
3074     vm_features.set_feature(CPU_PKU);
3075   }
3076   if (sef_cpuid7_ecx.bits.ospke != 0) {
3077     vm_features.set_feature(CPU_OSPKE);
3078   }
3079 
3080   // Control flow enforcement (CET) features.
3081   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3082     vm_features.set_feature(CPU_CET_SS);
3083   }
3084   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3085     vm_features.set_feature(CPU_CET_IBT);
3086   }
3087 
3088   // Composite features.
3089   if (supports_tscinv_bit() &&
3090       ((is_amd_family() && !is_amd_Barcelona()) ||
3091        is_intel_tsc_synched_at_init())) {
3092     vm_features.set_feature(CPU_TSCINV);
3093   }
3094   return vm_features;
3095 }
3096 
3097 bool VM_Version::os_supports_avx_vectors() {
3098   bool retVal = false;
3099   int nreg = 4;
3100   if (supports_evex()) {
3101     // Verify that OS save/restore all bits of EVEX registers
3102     // during signal processing.
3103     retVal = true;
3104     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3105       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3106         retVal = false;
3107         break;
3108       }
3109     }
3110   } else if (supports_avx()) {
3111     // Verify that OS save/restore all bits of AVX registers
3112     // during signal processing.
3113     retVal = true;
3114     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3115       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3116         retVal = false;
3117         break;
3118       }
3119     }
3120     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3121     if (retVal == false) {
3122       // Verify that OS save/restore all bits of EVEX registers
3123       // during signal processing.
3124       retVal = true;
3125       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3126         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3127           retVal = false;
3128           break;
3129         }
3130       }
3131     }
3132   }
3133   return retVal;
3134 }
3135 
3136 bool VM_Version::os_supports_apx_egprs() {
3137   if (!supports_apx_f()) {
3138     return false;
3139   }
3140   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3141       _cpuid_info.apx_save[1] != egpr_test_value()) {
3142     return false;
3143   }
3144   return true;
3145 }
3146 
3147 uint VM_Version::cores_per_cpu() {
3148   uint result = 1;
3149   if (is_intel()) {
3150     bool supports_topology = supports_processor_topology();
3151     if (supports_topology) {
3152       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3153                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3154     }
3155     if (!supports_topology || result == 0) {
3156       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3157     }
3158   } else if (is_amd_family()) {
3159     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3160     if (cpu_family() >= 0x17) { // Zen or later
3161       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3162     }
3163   } else if (is_zx()) {
3164     bool supports_topology = supports_processor_topology();
3165     if (supports_topology) {
3166       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3167                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3168     }
3169     if (!supports_topology || result == 0) {
3170       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3171     }
3172   }
3173   return result;
3174 }
3175 
3176 uint VM_Version::threads_per_core() {
3177   uint result = 1;
3178   if (is_intel() && supports_processor_topology()) {
3179     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3180   } else if (is_zx() && supports_processor_topology()) {
3181     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3182   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3183     if (cpu_family() >= 0x17) {
3184       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3185     } else {
3186       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3187                  cores_per_cpu();
3188     }
3189   }
3190   return (result == 0 ? 1 : result);
3191 }
3192 
3193 uint VM_Version::L1_line_size() {
3194   uint result = 0;
3195   if (is_intel()) {
3196     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3197   } else if (is_amd_family()) {
3198     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3199   } else if (is_zx()) {
3200     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3201   }
3202   if (result < 32) // not defined ?
3203     result = 32;   // 32 bytes by default on x86 and other x64
3204   return result;
3205 }
3206 
3207 bool VM_Version::is_intel_tsc_synched_at_init() {
3208   if (is_intel_family_core()) {
3209     uint32_t ext_model = extended_cpu_model();
3210     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3211         ext_model == CPU_MODEL_WESTMERE_EP    ||
3212         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3213         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3214       // <= 2-socket invariant tsc support. EX versions are usually used
3215       // in > 2-socket systems and likely don't synchronize tscs at
3216       // initialization.
3217       // Code that uses tsc values must be prepared for them to arbitrarily
3218       // jump forward or backward.
3219       return true;
3220     }
3221   }
3222   return false;
3223 }
3224 
3225 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3226   // Hardware prefetching (distance/size in bytes):
3227   // Pentium 3 -  64 /  32
3228   // Pentium 4 - 256 / 128
3229   // Athlon    -  64 /  32 ????
3230   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3231   // Core      - 128 /  64
3232   //
3233   // Software prefetching (distance in bytes / instruction with best score):
3234   // Pentium 3 - 128 / prefetchnta
3235   // Pentium 4 - 512 / prefetchnta
3236   // Athlon    - 128 / prefetchnta
3237   // Opteron   - 256 / prefetchnta
3238   // Core      - 256 / prefetchnta
3239   // It will be used only when AllocatePrefetchStyle > 0
3240 
3241   if (is_amd_family()) { // AMD | Hygon
3242     if (supports_sse2()) {
3243       return 256; // Opteron
3244     } else {
3245       return 128; // Athlon
3246     }
3247   } else { // Intel
3248     if (supports_sse3() && is_intel_server_family()) {
3249       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3250         return 192;
3251       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3252         return 384;
3253       }
3254     }
3255     if (supports_sse2()) {
3256       if (is_intel_server_family()) {
3257         return 256; // Pentium M, Core, Core2
3258       } else {
3259         return 512; // Pentium 4
3260       }
3261     } else {
3262       return 128; // Pentium 3 (and all other old CPUs)
3263     }
3264   }
3265 }
3266 
3267 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3268   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3269   switch (id) {
3270   case vmIntrinsics::_floatToFloat16:
3271   case vmIntrinsics::_float16ToFloat:
3272     if (!supports_float16()) {
3273       return false;
3274     }
3275     break;
3276   default:
3277     break;
3278   }
3279   return true;
3280 }
3281 
3282 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3283   int i = 0;
3284   ss.join([&]() {
3285     while (i < MAX_CPU_FEATURES) {
3286       if (_features.supports_feature((VM_Version::Feature_Flag)i)) {
3287         return _features_names[i++];
3288       }
3289       i += 1;
3290     }
3291     return (const char*)nullptr;
3292   }, ", ");
3293 }