1 /*
   2  * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2550;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76   typedef void (*getCPUIDBrandString_stub_t)(void*);
  77 }
  78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  79 static detect_virt_stub_t detect_virt_stub = nullptr;
  80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
  82 
  83 bool VM_Version::supports_clflush() {
  84   // clflush should always be available on x86_64
  85   // if not we are in real trouble because we rely on it
  86   // to flush the code cache.
  87   // Unfortunately, Assembler::clflush is currently called as part
  88   // of generation of the code cache flush routine. This happens
  89   // under Universe::init before the processor features are set
  90   // up. Assembler::flush calls this routine to check that clflush
  91   // is allowed. So, we give the caller a free pass if Universe init
  92   // is still in progress.
  93   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  94   return true;
  95 }
  96 
  97 #define CPUID_STANDARD_FN   0x0
  98 #define CPUID_STANDARD_FN_1 0x1
  99 #define CPUID_STANDARD_FN_4 0x4
 100 #define CPUID_STANDARD_FN_B 0xb
 101 
 102 #define CPUID_EXTENDED_FN   0x80000000
 103 #define CPUID_EXTENDED_FN_1 0x80000001
 104 #define CPUID_EXTENDED_FN_2 0x80000002
 105 #define CPUID_EXTENDED_FN_3 0x80000003
 106 #define CPUID_EXTENDED_FN_4 0x80000004
 107 #define CPUID_EXTENDED_FN_7 0x80000007
 108 #define CPUID_EXTENDED_FN_8 0x80000008
 109 
 110 class VM_Version_StubGenerator: public StubCodeGenerator {
 111  public:
 112 
 113   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 114 
 115   address clear_apx_test_state() {
 116 #   define __ _masm->
 117     address start = __ pc();
 118     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 119     // handling guarantees that preserved register values post signal handling were
 120     // re-instantiated by operating system and not because they were not modified externally.
 121 
 122     bool save_apx = UseAPX;
 123     VM_Version::set_apx_cpuFeatures();
 124     UseAPX = true;
 125     // EGPR state save/restoration.
 126     __ mov64(r16, 0L);
 127     __ mov64(r31, 0L);
 128     UseAPX = save_apx;
 129     VM_Version::clean_cpuFeatures();
 130     __ ret(0);
 131     return start;
 132   }
 133 
 134   address generate_get_cpu_info() {
 135     // Flags to test CPU type.
 136     const uint32_t HS_EFL_AC = 0x40000;
 137     const uint32_t HS_EFL_ID = 0x200000;
 138     // Values for when we don't have a CPUID instruction.
 139     const int      CPU_FAMILY_SHIFT = 8;
 140     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 141     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 142     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 143 
 144     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 145     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 146     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 147     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 148 
 149     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 150 #   define __ _masm->
 151 
 152     address start = __ pc();
 153 
 154     //
 155     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 156     //
 157     // rcx and rdx are first and second argument registers on windows
 158 
 159     __ push(rbp);
 160     __ mov(rbp, c_rarg0); // cpuid_info address
 161     __ push(rbx);
 162     __ push(rsi);
 163     __ pushf();          // preserve rbx, and flags
 164     __ pop(rax);
 165     __ push(rax);
 166     __ mov(rcx, rax);
 167     //
 168     // if we are unable to change the AC flag, we have a 386
 169     //
 170     __ xorl(rax, HS_EFL_AC);
 171     __ push(rax);
 172     __ popf();
 173     __ pushf();
 174     __ pop(rax);
 175     __ cmpptr(rax, rcx);
 176     __ jccb(Assembler::notEqual, detect_486);
 177 
 178     __ movl(rax, CPU_FAMILY_386);
 179     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 180     __ jmp(done);
 181 
 182     //
 183     // If we are unable to change the ID flag, we have a 486 which does
 184     // not support the "cpuid" instruction.
 185     //
 186     __ bind(detect_486);
 187     __ mov(rax, rcx);
 188     __ xorl(rax, HS_EFL_ID);
 189     __ push(rax);
 190     __ popf();
 191     __ pushf();
 192     __ pop(rax);
 193     __ cmpptr(rcx, rax);
 194     __ jccb(Assembler::notEqual, detect_586);
 195 
 196     __ bind(cpu486);
 197     __ movl(rax, CPU_FAMILY_486);
 198     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 199     __ jmp(done);
 200 
 201     //
 202     // At this point, we have a chip which supports the "cpuid" instruction
 203     //
 204     __ bind(detect_586);
 205     __ xorl(rax, rax);
 206     __ cpuid();
 207     __ orl(rax, rax);
 208     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 209                                         // value of at least 1, we give up and
 210                                         // assume a 486
 211     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 212     __ movl(Address(rsi, 0), rax);
 213     __ movl(Address(rsi, 4), rbx);
 214     __ movl(Address(rsi, 8), rcx);
 215     __ movl(Address(rsi,12), rdx);
 216 
 217     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 218     __ jccb(Assembler::belowEqual, std_cpuid4);
 219 
 220     //
 221     // cpuid(0xB) Processor Topology
 222     //
 223     __ movl(rax, 0xb);
 224     __ xorl(rcx, rcx);   // Threads level
 225     __ cpuid();
 226 
 227     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 228     __ movl(Address(rsi, 0), rax);
 229     __ movl(Address(rsi, 4), rbx);
 230     __ movl(Address(rsi, 8), rcx);
 231     __ movl(Address(rsi,12), rdx);
 232 
 233     __ movl(rax, 0xb);
 234     __ movl(rcx, 1);     // Cores level
 235     __ cpuid();
 236     __ push(rax);
 237     __ andl(rax, 0x1f);  // Determine if valid topology level
 238     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 239     __ andl(rax, 0xffff);
 240     __ pop(rax);
 241     __ jccb(Assembler::equal, std_cpuid4);
 242 
 243     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 244     __ movl(Address(rsi, 0), rax);
 245     __ movl(Address(rsi, 4), rbx);
 246     __ movl(Address(rsi, 8), rcx);
 247     __ movl(Address(rsi,12), rdx);
 248 
 249     __ movl(rax, 0xb);
 250     __ movl(rcx, 2);     // Packages level
 251     __ cpuid();
 252     __ push(rax);
 253     __ andl(rax, 0x1f);  // Determine if valid topology level
 254     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 255     __ andl(rax, 0xffff);
 256     __ pop(rax);
 257     __ jccb(Assembler::equal, std_cpuid4);
 258 
 259     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 260     __ movl(Address(rsi, 0), rax);
 261     __ movl(Address(rsi, 4), rbx);
 262     __ movl(Address(rsi, 8), rcx);
 263     __ movl(Address(rsi,12), rdx);
 264 
 265     //
 266     // cpuid(0x4) Deterministic cache params
 267     //
 268     __ bind(std_cpuid4);
 269     __ movl(rax, 4);
 270     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 271     __ jccb(Assembler::greater, std_cpuid1);
 272 
 273     __ xorl(rcx, rcx);   // L1 cache
 274     __ cpuid();
 275     __ push(rax);
 276     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 277     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 278     __ pop(rax);
 279     __ jccb(Assembler::equal, std_cpuid1);
 280 
 281     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 282     __ movl(Address(rsi, 0), rax);
 283     __ movl(Address(rsi, 4), rbx);
 284     __ movl(Address(rsi, 8), rcx);
 285     __ movl(Address(rsi,12), rdx);
 286 
 287     //
 288     // Standard cpuid(0x1)
 289     //
 290     __ bind(std_cpuid1);
 291     __ movl(rax, 1);
 292     __ cpuid();
 293     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 294     __ movl(Address(rsi, 0), rax);
 295     __ movl(Address(rsi, 4), rbx);
 296     __ movl(Address(rsi, 8), rcx);
 297     __ movl(Address(rsi,12), rdx);
 298 
 299     //
 300     // Check if OS has enabled XGETBV instruction to access XCR0
 301     // (OSXSAVE feature flag) and CPU supports AVX
 302     //
 303     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 304     __ cmpl(rcx, 0x18000000);
 305     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 306 
 307     //
 308     // XCR0, XFEATURE_ENABLED_MASK register
 309     //
 310     __ xorl(rcx, rcx);   // zero for XCR0 register
 311     __ xgetbv();
 312     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 313     __ movl(Address(rsi, 0), rax);
 314     __ movl(Address(rsi, 4), rdx);
 315 
 316     //
 317     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 318     //
 319     __ bind(sef_cpuid);
 320     __ movl(rax, 7);
 321     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 322     __ jccb(Assembler::greater, ext_cpuid);
 323     // ECX = 0
 324     __ xorl(rcx, rcx);
 325     __ cpuid();
 326     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 327     __ movl(Address(rsi, 0), rax);
 328     __ movl(Address(rsi, 4), rbx);
 329     __ movl(Address(rsi, 8), rcx);
 330     __ movl(Address(rsi, 12), rdx);
 331 
 332     //
 333     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 334     //
 335     __ bind(sefsl1_cpuid);
 336     __ movl(rax, 7);
 337     __ movl(rcx, 1);
 338     __ cpuid();
 339     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 340     __ movl(Address(rsi, 0), rax);
 341     __ movl(Address(rsi, 4), rdx);
 342 
 343     //
 344     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 345     //
 346     __ bind(std_cpuid29);
 347     __ movl(rax, 0x29);
 348     __ movl(rcx, 0);
 349     __ cpuid();
 350     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 351     __ movl(Address(rsi, 0), rbx);
 352 
 353     //
 354     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 355     //
 356     __ bind(std_cpuid24);
 357     __ movl(rax, 0x24);
 358     __ movl(rcx, 0);
 359     __ cpuid();
 360     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 361     __ movl(Address(rsi, 0), rax);
 362     __ movl(Address(rsi, 4), rbx);
 363 
 364     //
 365     // Extended cpuid(0x80000000)
 366     //
 367     __ bind(ext_cpuid);
 368     __ movl(rax, 0x80000000);
 369     __ cpuid();
 370     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 371     __ jcc(Assembler::belowEqual, done);
 372     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 373     __ jcc(Assembler::belowEqual, ext_cpuid1);
 374     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 375     __ jccb(Assembler::belowEqual, ext_cpuid5);
 376     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 377     __ jccb(Assembler::belowEqual, ext_cpuid7);
 378     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 379     __ jccb(Assembler::belowEqual, ext_cpuid8);
 380     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 381     __ jccb(Assembler::below, ext_cpuid8);
 382     //
 383     // Extended cpuid(0x8000001E)
 384     //
 385     __ movl(rax, 0x8000001E);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000008)
 395     //
 396     __ bind(ext_cpuid8);
 397     __ movl(rax, 0x80000008);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000007)
 407     //
 408     __ bind(ext_cpuid7);
 409     __ movl(rax, 0x80000007);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000005)
 419     //
 420     __ bind(ext_cpuid5);
 421     __ movl(rax, 0x80000005);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Extended cpuid(0x80000001)
 431     //
 432     __ bind(ext_cpuid1);
 433     __ movl(rax, 0x80000001);
 434     __ cpuid();
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 436     __ movl(Address(rsi, 0), rax);
 437     __ movl(Address(rsi, 4), rbx);
 438     __ movl(Address(rsi, 8), rcx);
 439     __ movl(Address(rsi,12), rdx);
 440 
 441     //
 442     // Check if OS has enabled XGETBV instruction to access XCR0
 443     // (OSXSAVE feature flag) and CPU supports APX
 444     //
 445     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 446     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 447     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 448     __ movl(rax, 0x200000);
 449     __ andl(rax, Address(rsi, 4));
 450     __ jcc(Assembler::equal, vector_save_restore);
 451     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 452     __ movl(rax, 0x80000);
 453     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 454     __ jcc(Assembler::equal, vector_save_restore);
 455 
 456     bool save_apx = UseAPX;
 457     VM_Version::set_apx_cpuFeatures();
 458     UseAPX = true;
 459     __ mov64(r16, VM_Version::egpr_test_value());
 460     __ mov64(r31, VM_Version::egpr_test_value());
 461     __ xorl(rsi, rsi);
 462     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 463     // Generate SEGV
 464     __ movl(rax, Address(rsi, 0));
 465 
 466     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 467     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 468     __ movq(Address(rsi, 0), r16);
 469     __ movq(Address(rsi, 8), r31);
 470 
 471     UseAPX = save_apx;
 472     __ bind(vector_save_restore);
 473     //
 474     // Check if OS has enabled XGETBV instruction to access XCR0
 475     // (OSXSAVE feature flag) and CPU supports AVX
 476     //
 477     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 478     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 479     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 480     __ cmpl(rcx, 0x18000000);
 481     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 482 
 483     __ movl(rax, 0x6);
 484     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 485     __ cmpl(rax, 0x6);
 486     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 487 
 488     // we need to bridge farther than imm8, so we use this island as a thunk
 489     __ bind(done);
 490     __ jmp(wrapup);
 491 
 492     __ bind(start_simd_check);
 493     //
 494     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 495     // registers are not restored after a signal processing.
 496     // Generate SEGV here (reference through null)
 497     // and check upper YMM/ZMM bits after it.
 498     //
 499     int saved_useavx = UseAVX;
 500     int saved_usesse = UseSSE;
 501 
 502     // If UseAVX is uninitialized or is set by the user to include EVEX
 503     if (use_evex) {
 504       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 505       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 506       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 507       __ movl(rax, 0x10000);
 508       __ andl(rax, Address(rsi, 4));
 509       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 510       __ movl(rbx, 0x80000);
 511       __ andl(rbx, Address(rsi, 4));
 512       __ orl(rax, rbx);
 513       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 514       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 515       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 516       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 517       __ movl(rax, 0xE0);
 518       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 519       __ cmpl(rax, 0xE0);
 520       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 521 
 522       if (FLAG_IS_DEFAULT(UseAVX)) {
 523         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 524         __ movl(rax, Address(rsi, 0));
 525         __ cmpl(rax, 0x50654);              // If it is Skylake
 526         __ jcc(Assembler::equal, legacy_setup);
 527       }
 528       // EVEX setup: run in lowest evex mode
 529       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 530       UseAVX = 3;
 531       UseSSE = 2;
 532 #ifdef _WINDOWS
 533       // xmm5-xmm15 are not preserved by caller on windows
 534       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 535       __ subptr(rsp, 64);
 536       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 537       __ subptr(rsp, 64);
 538       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 539       __ subptr(rsp, 64);
 540       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 541 #endif // _WINDOWS
 542 
 543       // load value into all 64 bytes of zmm7 register
 544       __ movl(rcx, VM_Version::ymm_test_value());
 545       __ movdl(xmm0, rcx);
 546       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 547       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 548       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 549       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 550       VM_Version::clean_cpuFeatures();
 551       __ jmp(save_restore_except);
 552     }
 553 
 554     __ bind(legacy_setup);
 555     // AVX setup
 556     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 557     UseAVX = 1;
 558     UseSSE = 2;
 559 #ifdef _WINDOWS
 560     __ subptr(rsp, 32);
 561     __ vmovdqu(Address(rsp, 0), xmm7);
 562     __ subptr(rsp, 32);
 563     __ vmovdqu(Address(rsp, 0), xmm8);
 564     __ subptr(rsp, 32);
 565     __ vmovdqu(Address(rsp, 0), xmm15);
 566 #endif // _WINDOWS
 567 
 568     // load value into all 32 bytes of ymm7 register
 569     __ movl(rcx, VM_Version::ymm_test_value());
 570 
 571     __ movdl(xmm0, rcx);
 572     __ pshufd(xmm0, xmm0, 0x00);
 573     __ vinsertf128_high(xmm0, xmm0);
 574     __ vmovdqu(xmm7, xmm0);
 575     __ vmovdqu(xmm8, xmm0);
 576     __ vmovdqu(xmm15, xmm0);
 577     VM_Version::clean_cpuFeatures();
 578 
 579     __ bind(save_restore_except);
 580     __ xorl(rsi, rsi);
 581     VM_Version::set_cpuinfo_segv_addr(__ pc());
 582     // Generate SEGV
 583     __ movl(rax, Address(rsi, 0));
 584 
 585     VM_Version::set_cpuinfo_cont_addr(__ pc());
 586     // Returns here after signal. Save xmm0 to check it later.
 587 
 588     // If UseAVX is uninitialized or is set by the user to include EVEX
 589     if (use_evex) {
 590       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 591       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 592       __ movl(rax, 0x10000);
 593       __ andl(rax, Address(rsi, 4));
 594       __ jcc(Assembler::equal, legacy_save_restore);
 595       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 596       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 597       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 598       __ movl(rax, 0xE0);
 599       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 600       __ cmpl(rax, 0xE0);
 601       __ jcc(Assembler::notEqual, legacy_save_restore);
 602 
 603       if (FLAG_IS_DEFAULT(UseAVX)) {
 604         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 605         __ movl(rax, Address(rsi, 0));
 606         __ cmpl(rax, 0x50654);              // If it is Skylake
 607         __ jcc(Assembler::equal, legacy_save_restore);
 608       }
 609       // EVEX check: run in lowest evex mode
 610       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 611       UseAVX = 3;
 612       UseSSE = 2;
 613       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 614       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 615       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 616       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 617       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 618 
 619 #ifdef _WINDOWS
 620       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 621       __ addptr(rsp, 64);
 622       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 623       __ addptr(rsp, 64);
 624       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 625       __ addptr(rsp, 64);
 626 #endif // _WINDOWS
 627       generate_vzeroupper(wrapup);
 628       VM_Version::clean_cpuFeatures();
 629       UseAVX = saved_useavx;
 630       UseSSE = saved_usesse;
 631       __ jmp(wrapup);
 632    }
 633 
 634     __ bind(legacy_save_restore);
 635     // AVX check
 636     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 637     UseAVX = 1;
 638     UseSSE = 2;
 639     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 640     __ vmovdqu(Address(rsi, 0), xmm0);
 641     __ vmovdqu(Address(rsi, 32), xmm7);
 642     __ vmovdqu(Address(rsi, 64), xmm8);
 643     __ vmovdqu(Address(rsi, 96), xmm15);
 644 
 645 #ifdef _WINDOWS
 646     __ vmovdqu(xmm15, Address(rsp, 0));
 647     __ addptr(rsp, 32);
 648     __ vmovdqu(xmm8, Address(rsp, 0));
 649     __ addptr(rsp, 32);
 650     __ vmovdqu(xmm7, Address(rsp, 0));
 651     __ addptr(rsp, 32);
 652 #endif // _WINDOWS
 653 
 654     generate_vzeroupper(wrapup);
 655     VM_Version::clean_cpuFeatures();
 656     UseAVX = saved_useavx;
 657     UseSSE = saved_usesse;
 658 
 659     __ bind(wrapup);
 660     __ popf();
 661     __ pop(rsi);
 662     __ pop(rbx);
 663     __ pop(rbp);
 664     __ ret(0);
 665 
 666 #   undef __
 667 
 668     return start;
 669   };
 670   void generate_vzeroupper(Label& L_wrapup) {
 671 #   define __ _masm->
 672     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 673     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 674     __ jcc(Assembler::notEqual, L_wrapup);
 675     __ movl(rcx, 0x0FFF0FF0);
 676     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 677     __ andl(rcx, Address(rsi, 0));
 678     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 679     __ jcc(Assembler::equal, L_wrapup);
 680     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 681     __ jcc(Assembler::equal, L_wrapup);
 682     // vzeroupper() will use a pre-computed instruction sequence that we
 683     // can't compute until after we've determined CPU capabilities. Use
 684     // uncached variant here directly to be able to bootstrap correctly
 685     __ vzeroupper_uncached();
 686 #   undef __
 687   }
 688   address generate_detect_virt() {
 689     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 690 #   define __ _masm->
 691 
 692     address start = __ pc();
 693 
 694     // Evacuate callee-saved registers
 695     __ push(rbp);
 696     __ push(rbx);
 697     __ push(rsi); // for Windows
 698 
 699     __ mov(rax, c_rarg0); // CPUID leaf
 700     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 701 
 702     __ cpuid();
 703 
 704     // Store result to register array
 705     __ movl(Address(rsi,  0), rax);
 706     __ movl(Address(rsi,  4), rbx);
 707     __ movl(Address(rsi,  8), rcx);
 708     __ movl(Address(rsi, 12), rdx);
 709 
 710     // Epilogue
 711     __ pop(rsi);
 712     __ pop(rbx);
 713     __ pop(rbp);
 714     __ ret(0);
 715 
 716 #   undef __
 717 
 718     return start;
 719   };
 720 
 721 
 722   address generate_getCPUIDBrandString(void) {
 723     // Flags to test CPU type.
 724     const uint32_t HS_EFL_AC           = 0x40000;
 725     const uint32_t HS_EFL_ID           = 0x200000;
 726     // Values for when we don't have a CPUID instruction.
 727     const int      CPU_FAMILY_SHIFT = 8;
 728     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 729     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 730 
 731     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 732 
 733     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 734 #   define __ _masm->
 735 
 736     address start = __ pc();
 737 
 738     //
 739     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 740     //
 741     // rcx and rdx are first and second argument registers on windows
 742 
 743     __ push(rbp);
 744     __ mov(rbp, c_rarg0); // cpuid_info address
 745     __ push(rbx);
 746     __ push(rsi);
 747     __ pushf();          // preserve rbx, and flags
 748     __ pop(rax);
 749     __ push(rax);
 750     __ mov(rcx, rax);
 751     //
 752     // if we are unable to change the AC flag, we have a 386
 753     //
 754     __ xorl(rax, HS_EFL_AC);
 755     __ push(rax);
 756     __ popf();
 757     __ pushf();
 758     __ pop(rax);
 759     __ cmpptr(rax, rcx);
 760     __ jccb(Assembler::notEqual, detect_486);
 761 
 762     __ movl(rax, CPU_FAMILY_386);
 763     __ jmp(done);
 764 
 765     //
 766     // If we are unable to change the ID flag, we have a 486 which does
 767     // not support the "cpuid" instruction.
 768     //
 769     __ bind(detect_486);
 770     __ mov(rax, rcx);
 771     __ xorl(rax, HS_EFL_ID);
 772     __ push(rax);
 773     __ popf();
 774     __ pushf();
 775     __ pop(rax);
 776     __ cmpptr(rcx, rax);
 777     __ jccb(Assembler::notEqual, detect_586);
 778 
 779     __ bind(cpu486);
 780     __ movl(rax, CPU_FAMILY_486);
 781     __ jmp(done);
 782 
 783     //
 784     // At this point, we have a chip which supports the "cpuid" instruction
 785     //
 786     __ bind(detect_586);
 787     __ xorl(rax, rax);
 788     __ cpuid();
 789     __ orl(rax, rax);
 790     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 791                                         // value of at least 1, we give up and
 792                                         // assume a 486
 793 
 794     //
 795     // Extended cpuid(0x80000000) for processor brand string detection
 796     //
 797     __ bind(ext_cpuid);
 798     __ movl(rax, CPUID_EXTENDED_FN);
 799     __ cpuid();
 800     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 801     __ jcc(Assembler::below, done);
 802 
 803     //
 804     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 805     //
 806     __ movl(rax, CPUID_EXTENDED_FN_2);
 807     __ cpuid();
 808     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 809     __ movl(Address(rsi, 0), rax);
 810     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 811     __ movl(Address(rsi, 0), rbx);
 812     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 813     __ movl(Address(rsi, 0), rcx);
 814     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 815     __ movl(Address(rsi,0), rdx);
 816 
 817     //
 818     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 819     //
 820     __ movl(rax, CPUID_EXTENDED_FN_3);
 821     __ cpuid();
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 823     __ movl(Address(rsi, 0), rax);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 825     __ movl(Address(rsi, 0), rbx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 827     __ movl(Address(rsi, 0), rcx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 829     __ movl(Address(rsi,0), rdx);
 830 
 831     //
 832     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 833     //
 834     __ movl(rax, CPUID_EXTENDED_FN_4);
 835     __ cpuid();
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 837     __ movl(Address(rsi, 0), rax);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 839     __ movl(Address(rsi, 0), rbx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 841     __ movl(Address(rsi, 0), rcx);
 842     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 843     __ movl(Address(rsi,0), rdx);
 844 
 845     //
 846     // return
 847     //
 848     __ bind(done);
 849     __ popf();
 850     __ pop(rsi);
 851     __ pop(rbx);
 852     __ pop(rbp);
 853     __ ret(0);
 854 
 855 #   undef __
 856 
 857     return start;
 858   };
 859 };
 860 
 861 void VM_Version::get_processor_features() {
 862 
 863   _cpu = 4; // 486 by default
 864   _model = 0;
 865   _stepping = 0;
 866   _logical_processors_per_package = 1;
 867   // i486 internal cache is both I&D and has a 16-byte line size
 868   _L1_data_cache_line_size = 16;
 869 
 870   // Get raw processor info
 871 
 872   get_cpu_info_stub(&_cpuid_info);
 873 
 874   assert_is_initialized();
 875   _cpu = extended_cpu_family();
 876   _model = extended_cpu_model();
 877   _stepping = cpu_stepping();
 878 
 879   if (cpu_family() > 4) { // it supports CPUID
 880     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 881     _cpu_features = _features; // Preserve features
 882     // Logical processors are only available on P4s and above,
 883     // and only if hyperthreading is available.
 884     _logical_processors_per_package = logical_processor_count();
 885     _L1_data_cache_line_size = L1_line_size();
 886   }
 887 
 888   // xchg and xadd instructions
 889   _supports_atomic_getset4 = true;
 890   _supports_atomic_getadd4 = true;
 891   _supports_atomic_getset8 = true;
 892   _supports_atomic_getadd8 = true;
 893 
 894   // OS should support SSE for x64 and hardware should support at least SSE2.
 895   if (!VM_Version::supports_sse2()) {
 896     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 897   }
 898   // in 64 bit the use of SSE2 is the minimum
 899   if (UseSSE < 2) UseSSE = 2;
 900 
 901   // flush_icache_stub have to be generated first.
 902   // That is why Icache line size is hard coded in ICache class,
 903   // see icache_x86.hpp. It is also the reason why we can't use
 904   // clflush instruction in 32-bit VM since it could be running
 905   // on CPU which does not support it.
 906   //
 907   // The only thing we can do is to verify that flushed
 908   // ICache::line_size has correct value.
 909   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 910   // clflush_size is size in quadwords (8 bytes).
 911   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 912 
 913   // assigning this field effectively enables Unsafe.writebackMemory()
 914   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 915   // that is only implemented on x86_64 and only if the OS plays ball
 916   if (os::supports_map_sync()) {
 917     // publish data cache line flush size to generic field, otherwise
 918     // let if default to zero thereby disabling writeback
 919     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 920   }
 921 
 922   // Check if processor has Intel Ecore
 923   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 924     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
 925       _model == 0xCC || _model == 0xDD)) {
 926     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 927   }
 928 
 929   if (UseSSE < 4) {
 930     _features.clear_feature(CPU_SSE4_1);
 931     _features.clear_feature(CPU_SSE4_2);
 932   }
 933 
 934   if (UseSSE < 3) {
 935     _features.clear_feature(CPU_SSE3);
 936     _features.clear_feature(CPU_SSSE3);
 937     _features.clear_feature(CPU_SSE4A);
 938   }
 939 
 940   if (UseSSE < 2)
 941     _features.clear_feature(CPU_SSE2);
 942 
 943   if (UseSSE < 1)
 944     _features.clear_feature(CPU_SSE);
 945 
 946   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 947   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 948     UseAVX = 0;
 949   }
 950 
 951   // UseSSE is set to the smaller of what hardware supports and what
 952   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 953   // older Pentiums which do not support it.
 954   int use_sse_limit = 0;
 955   if (UseSSE > 0) {
 956     if (UseSSE > 3 && supports_sse4_1()) {
 957       use_sse_limit = 4;
 958     } else if (UseSSE > 2 && supports_sse3()) {
 959       use_sse_limit = 3;
 960     } else if (UseSSE > 1 && supports_sse2()) {
 961       use_sse_limit = 2;
 962     } else if (UseSSE > 0 && supports_sse()) {
 963       use_sse_limit = 1;
 964     } else {
 965       use_sse_limit = 0;
 966     }
 967   }
 968   if (FLAG_IS_DEFAULT(UseSSE)) {
 969     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 970   } else if (UseSSE > use_sse_limit) {
 971     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 972     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 973   }
 974 
 975   // first try initial setting and detect what we can support
 976   int use_avx_limit = 0;
 977   if (UseAVX > 0) {
 978     if (UseSSE < 4) {
 979       // Don't use AVX if SSE is unavailable or has been disabled.
 980       use_avx_limit = 0;
 981     } else if (UseAVX > 2 && supports_evex()) {
 982       use_avx_limit = 3;
 983     } else if (UseAVX > 1 && supports_avx2()) {
 984       use_avx_limit = 2;
 985     } else if (UseAVX > 0 && supports_avx()) {
 986       use_avx_limit = 1;
 987     } else {
 988       use_avx_limit = 0;
 989     }
 990   }
 991   if (FLAG_IS_DEFAULT(UseAVX)) {
 992     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 993     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 994       FLAG_SET_DEFAULT(UseAVX, 2);
 995     } else {
 996       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 997     }
 998   }
 999 
1000   if (UseAVX > use_avx_limit) {
1001     if (UseSSE < 4) {
1002       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1003     } else {
1004       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1005     }
1006     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1007   }
1008 
1009   if (UseAVX < 3) {
1010     _features.clear_feature(CPU_AVX512F);
1011     _features.clear_feature(CPU_AVX512DQ);
1012     _features.clear_feature(CPU_AVX512CD);
1013     _features.clear_feature(CPU_AVX512BW);
1014     _features.clear_feature(CPU_AVX512ER);
1015     _features.clear_feature(CPU_AVX512PF);
1016     _features.clear_feature(CPU_AVX512VL);
1017     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1018     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1019     _features.clear_feature(CPU_AVX512_VAES);
1020     _features.clear_feature(CPU_AVX512_VNNI);
1021     _features.clear_feature(CPU_AVX512_VBMI);
1022     _features.clear_feature(CPU_AVX512_VBMI2);
1023     _features.clear_feature(CPU_AVX512_BITALG);
1024     _features.clear_feature(CPU_AVX512_IFMA);
1025     _features.clear_feature(CPU_APX_F);
1026     _features.clear_feature(CPU_AVX512_FP16);
1027     _features.clear_feature(CPU_AVX10_1);
1028     _features.clear_feature(CPU_AVX10_2);
1029   }
1030 
1031 
1032   if (UseAVX < 2) {
1033     _features.clear_feature(CPU_AVX2);
1034     _features.clear_feature(CPU_AVX_IFMA);
1035   }
1036 
1037   if (UseAVX < 1) {
1038     _features.clear_feature(CPU_AVX);
1039     _features.clear_feature(CPU_VZEROUPPER);
1040     _features.clear_feature(CPU_F16C);
1041     _features.clear_feature(CPU_SHA512);
1042   }
1043 
1044   if (logical_processors_per_package() == 1) {
1045     // HT processor could be installed on a system which doesn't support HT.
1046     _features.clear_feature(CPU_HT);
1047   }
1048 
1049   if (is_intel()) { // Intel cpus specific settings
1050     if (is_knights_family()) {
1051       _features.clear_feature(CPU_VZEROUPPER);
1052       _features.clear_feature(CPU_AVX512BW);
1053       _features.clear_feature(CPU_AVX512VL);
1054       _features.clear_feature(CPU_APX_F);
1055       _features.clear_feature(CPU_AVX512DQ);
1056       _features.clear_feature(CPU_AVX512_VNNI);
1057       _features.clear_feature(CPU_AVX512_VAES);
1058       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1059       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1060       _features.clear_feature(CPU_AVX512_VBMI);
1061       _features.clear_feature(CPU_AVX512_VBMI2);
1062       _features.clear_feature(CPU_CLWB);
1063       _features.clear_feature(CPU_FLUSHOPT);
1064       _features.clear_feature(CPU_GFNI);
1065       _features.clear_feature(CPU_AVX512_BITALG);
1066       _features.clear_feature(CPU_AVX512_IFMA);
1067       _features.clear_feature(CPU_AVX_IFMA);
1068       _features.clear_feature(CPU_AVX512_FP16);
1069       _features.clear_feature(CPU_AVX10_1);
1070       _features.clear_feature(CPU_AVX10_2);
1071     }
1072   }
1073 
1074     // Currently APX support is only enabled for targets supporting AVX512VL feature.
1075   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1076   if (UseAPX && !apx_supported) {
1077     warning("UseAPX is not supported on this CPU, setting it to false");
1078     FLAG_SET_DEFAULT(UseAPX, false);
1079   }
1080 
1081   if (!UseAPX) {
1082     _features.clear_feature(CPU_APX_F);
1083   }
1084 
1085   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1086     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1087     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1088   } else {
1089     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1090   }
1091 
1092   assert(supports_clflush(), "Always present");
1093   if (X86ICacheSync == -1) {
1094     // Auto-detect, choosing the best performant one that still flushes
1095     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1096     if (supports_clwb()) {
1097       FLAG_SET_ERGO(X86ICacheSync, 3);
1098     } else if (supports_clflushopt()) {
1099       FLAG_SET_ERGO(X86ICacheSync, 2);
1100     } else {
1101       FLAG_SET_ERGO(X86ICacheSync, 1);
1102     }
1103   } else {
1104     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1105       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1106     }
1107     if ((X86ICacheSync == 3) && !supports_clwb()) {
1108       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1109     }
1110     if ((X86ICacheSync == 5) && !supports_serialize()) {
1111       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1112     }
1113   }
1114 
1115   stringStream ss(2048);
1116   if (supports_hybrid()) {
1117     ss.print("(hybrid)");
1118   } else {
1119     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1120   }
1121   ss.print(" family %d model %d stepping %d microcode 0x%x",
1122            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1123   ss.print(", ");
1124   int features_offset = (int)ss.size();
1125   insert_features_names(_features, ss);
1126 
1127   _cpu_info_string = ss.as_string(true);
1128   _features_string = _cpu_info_string + features_offset;
1129 
1130   // Use AES instructions if available.
1131   if (supports_aes()) {
1132     if (FLAG_IS_DEFAULT(UseAES)) {
1133       FLAG_SET_DEFAULT(UseAES, true);
1134     }
1135     if (!UseAES) {
1136       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1137         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1138       }
1139       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1140     } else {
1141       if (UseSSE > 2) {
1142         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1143           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1144         }
1145       } else {
1146         // The AES intrinsic stubs require AES instruction support (of course)
1147         // but also require sse3 mode or higher for instructions it use.
1148         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1149           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1150         }
1151         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1152       }
1153 
1154       // --AES-CTR begins--
1155       if (!UseAESIntrinsics) {
1156         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1157           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1158           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1159         }
1160       } else {
1161         if (supports_sse4_1()) {
1162           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1163             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1164           }
1165         } else {
1166            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1167            // but also require sse4.1 mode or higher for instructions it use.
1168           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1169              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1170            }
1171            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1172         }
1173       }
1174       // --AES-CTR ends--
1175     }
1176   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1177     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1178       warning("AES instructions are not available on this CPU");
1179       FLAG_SET_DEFAULT(UseAES, false);
1180     }
1181     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1182       warning("AES intrinsics are not available on this CPU");
1183       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1184     }
1185     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1186       warning("AES-CTR intrinsics are not available on this CPU");
1187       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1188     }
1189   }
1190 
1191   // Use CLMUL instructions if available.
1192   if (supports_clmul()) {
1193     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1194       UseCLMUL = true;
1195     }
1196   } else if (UseCLMUL) {
1197     if (!FLAG_IS_DEFAULT(UseCLMUL))
1198       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1199     FLAG_SET_DEFAULT(UseCLMUL, false);
1200   }
1201 
1202   if (UseCLMUL && (UseSSE > 2)) {
1203     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1204       UseCRC32Intrinsics = true;
1205     }
1206   } else if (UseCRC32Intrinsics) {
1207     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1208       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1209     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1210   }
1211 
1212   if (supports_avx2()) {
1213     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1214       UseAdler32Intrinsics = true;
1215     }
1216   } else if (UseAdler32Intrinsics) {
1217     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1218       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1219     }
1220     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1221   }
1222 
1223   if (supports_sse4_2() && supports_clmul()) {
1224     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1225       UseCRC32CIntrinsics = true;
1226     }
1227   } else if (UseCRC32CIntrinsics) {
1228     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1229       warning("CRC32C intrinsics are not available on this CPU");
1230     }
1231     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1232   }
1233 
1234   // GHASH/GCM intrinsics
1235   if (UseCLMUL && (UseSSE > 2)) {
1236     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1237       UseGHASHIntrinsics = true;
1238     }
1239   } else if (UseGHASHIntrinsics) {
1240     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1241       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1242     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1243   }
1244 
1245   // ChaCha20 Intrinsics
1246   // As long as the system supports AVX as a baseline we can do a
1247   // SIMD-enabled block function.  StubGenerator makes the determination
1248   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1249   // version.
1250   if (UseAVX >= 1) {
1251       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1252           UseChaCha20Intrinsics = true;
1253       }
1254   } else if (UseChaCha20Intrinsics) {
1255       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1256           warning("ChaCha20 intrinsic requires AVX instructions");
1257       }
1258       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1259   }
1260 
1261   // Kyber Intrinsics
1262   // Currently we only have them for AVX512
1263   if (supports_evex() && supports_avx512bw()) {
1264       if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1265           UseKyberIntrinsics = true;
1266       }
1267   } else
1268   if (UseKyberIntrinsics) {
1269      warning("Intrinsics for ML-KEM are not available on this CPU.");
1270      FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1271   }
1272 
1273   // Dilithium Intrinsics
1274   if (UseAVX > 1) {
1275       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1276           UseDilithiumIntrinsics = true;
1277       }
1278   } else if (UseDilithiumIntrinsics) {
1279       warning("Intrinsics for ML-DSA are not available on this CPU.");
1280       FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1281   }
1282 
1283   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1284   if (UseAVX >= 2) {
1285     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1286       UseBASE64Intrinsics = true;
1287     }
1288   } else if (UseBASE64Intrinsics) {
1289      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1290       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1291     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1292   }
1293 
1294   if (supports_fma()) {
1295     if (FLAG_IS_DEFAULT(UseFMA)) {
1296       UseFMA = true;
1297     }
1298   } else if (UseFMA) {
1299     warning("FMA instructions are not available on this CPU");
1300     FLAG_SET_DEFAULT(UseFMA, false);
1301   }
1302 
1303   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1304     UseMD5Intrinsics = true;
1305   }
1306 
1307   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1308     if (FLAG_IS_DEFAULT(UseSHA)) {
1309       UseSHA = true;
1310     }
1311   } else if (UseSHA) {
1312     warning("SHA instructions are not available on this CPU");
1313     FLAG_SET_DEFAULT(UseSHA, false);
1314   }
1315 
1316   if (supports_sha() && supports_sse4_1() && UseSHA) {
1317     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1318       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1319     }
1320   } else if (UseSHA1Intrinsics) {
1321     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1322     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1323   }
1324 
1325   if (supports_sse4_1() && UseSHA) {
1326     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1327       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1328     }
1329   } else if (UseSHA256Intrinsics) {
1330     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1331     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1332   }
1333 
1334   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1335     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1336       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1337     }
1338   } else if (UseSHA512Intrinsics) {
1339     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1340     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1341   }
1342 
1343   if (supports_evex() && supports_avx512bw()) {
1344       if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1345           UseSHA3Intrinsics = true;
1346       }
1347   } else if (UseSHA3Intrinsics) {
1348       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1349       FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1350   }
1351 
1352   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1353     FLAG_SET_DEFAULT(UseSHA, false);
1354   }
1355 
1356 #if COMPILER2_OR_JVMCI
1357   int max_vector_size = 0;
1358   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1359     // 16 byte vectors (in XMM) are supported with SSE2+
1360     max_vector_size = 16;
1361   } else if (UseAVX == 1 || UseAVX == 2) {
1362     // 32 bytes vectors (in YMM) are only supported with AVX+
1363     max_vector_size = 32;
1364   } else if (UseAVX > 2) {
1365     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1366     max_vector_size = 64;
1367   }
1368 
1369   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1370 
1371   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1372     if (MaxVectorSize < min_vector_size) {
1373       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1374       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1375     }
1376     if (MaxVectorSize > max_vector_size) {
1377       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1378       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1379     }
1380     if (!is_power_of_2(MaxVectorSize)) {
1381       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1382       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1383     }
1384   } else {
1385     // If default, use highest supported configuration
1386     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1387   }
1388 
1389 #if defined(COMPILER2) && defined(ASSERT)
1390   if (MaxVectorSize > 0) {
1391     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1392       tty->print_cr("State of YMM registers after signal handle:");
1393       int nreg = 4;
1394       const char* ymm_name[4] = {"0", "7", "8", "15"};
1395       for (int i = 0; i < nreg; i++) {
1396         tty->print("YMM%s:", ymm_name[i]);
1397         for (int j = 7; j >=0; j--) {
1398           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1399         }
1400         tty->cr();
1401       }
1402     }
1403   }
1404 #endif // COMPILER2 && ASSERT
1405 
1406   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1407     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1408       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1409     }
1410   } else if (UsePoly1305Intrinsics) {
1411     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1412     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1413   }
1414 
1415   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1416     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1417       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1418     }
1419   } else if (UseIntPolyIntrinsics) {
1420     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1421     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1422   }
1423 
1424   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1425     UseMultiplyToLenIntrinsic = true;
1426   }
1427   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1428     UseSquareToLenIntrinsic = true;
1429   }
1430   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1431     UseMulAddIntrinsic = true;
1432   }
1433   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1434     UseMontgomeryMultiplyIntrinsic = true;
1435   }
1436   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1437     UseMontgomerySquareIntrinsic = true;
1438   }
1439 #endif // COMPILER2_OR_JVMCI
1440 
1441   // On new cpus instructions which update whole XMM register should be used
1442   // to prevent partial register stall due to dependencies on high half.
1443   //
1444   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1445   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1446   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1447   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1448 
1449 
1450   if (is_zx()) { // ZX cpus specific settings
1451     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1452       UseStoreImmI16 = false; // don't use it on ZX cpus
1453     }
1454     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1455       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1456         // Use it on all ZX cpus
1457         UseAddressNop = true;
1458       }
1459     }
1460     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1461       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1462     }
1463     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1464       if (supports_sse3()) {
1465         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1466       } else {
1467         UseXmmRegToRegMoveAll = false;
1468       }
1469     }
1470     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1471 #ifdef COMPILER2
1472       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1473         // For new ZX cpus do the next optimization:
1474         // don't align the beginning of a loop if there are enough instructions
1475         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1476         // in current fetch line (OptoLoopAlignment) or the padding
1477         // is big (> MaxLoopPad).
1478         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1479         // generated NOP instructions. 11 is the largest size of one
1480         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1481         MaxLoopPad = 11;
1482       }
1483 #endif // COMPILER2
1484       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1485         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1486       }
1487       if (supports_sse4_2()) { // new ZX cpus
1488         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1489           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1490         }
1491       }
1492     }
1493 
1494     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1495       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1496     }
1497   }
1498 
1499   if (is_amd_family()) { // AMD cpus specific settings
1500     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1501       // Use it on new AMD cpus starting from Opteron.
1502       UseAddressNop = true;
1503     }
1504     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1505       // Use it on new AMD cpus starting from Opteron.
1506       UseNewLongLShift = true;
1507     }
1508     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1509       if (supports_sse4a()) {
1510         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1511       } else {
1512         UseXmmLoadAndClearUpper = false;
1513       }
1514     }
1515     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1516       if (supports_sse4a()) {
1517         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1518       } else {
1519         UseXmmRegToRegMoveAll = false;
1520       }
1521     }
1522     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1523       if (supports_sse4a()) {
1524         UseXmmI2F = true;
1525       } else {
1526         UseXmmI2F = false;
1527       }
1528     }
1529     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1530       if (supports_sse4a()) {
1531         UseXmmI2D = true;
1532       } else {
1533         UseXmmI2D = false;
1534       }
1535     }
1536 
1537     // some defaults for AMD family 15h
1538     if (cpu_family() == 0x15) {
1539       // On family 15h processors default is no sw prefetch
1540       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1541         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1542       }
1543       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1544       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1545         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1546       }
1547       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1548       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1549         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1550       }
1551       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1552         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1553       }
1554     }
1555 
1556 #ifdef COMPILER2
1557     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1558       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1559       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1560     }
1561 #endif // COMPILER2
1562 
1563     // Some defaults for AMD family >= 17h && Hygon family 18h
1564     if (cpu_family() >= 0x17) {
1565       // On family >=17h processors use XMM and UnalignedLoadStores
1566       // for Array Copy
1567       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1568         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1569       }
1570       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1571         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1572       }
1573 #ifdef COMPILER2
1574       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1575         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1576       }
1577 #endif
1578     }
1579   }
1580 
1581   if (is_intel()) { // Intel cpus specific settings
1582     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1583       UseStoreImmI16 = false; // don't use it on Intel cpus
1584     }
1585     if (is_intel_server_family() || cpu_family() == 15) {
1586       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1587         // Use it on all Intel cpus starting from PentiumPro
1588         UseAddressNop = true;
1589       }
1590     }
1591     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1592       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1593     }
1594     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1595       if (supports_sse3()) {
1596         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1597       } else {
1598         UseXmmRegToRegMoveAll = false;
1599       }
1600     }
1601     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1602 #ifdef COMPILER2
1603       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1604         // For new Intel cpus do the next optimization:
1605         // don't align the beginning of a loop if there are enough instructions
1606         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1607         // in current fetch line (OptoLoopAlignment) or the padding
1608         // is big (> MaxLoopPad).
1609         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1610         // generated NOP instructions. 11 is the largest size of one
1611         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1612         MaxLoopPad = 11;
1613       }
1614 #endif // COMPILER2
1615 
1616       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1617         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1618       }
1619       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1620         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1621           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1622         }
1623       }
1624     }
1625     if (is_atom_family() || is_knights_family()) {
1626 #ifdef COMPILER2
1627       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1628         OptoScheduling = true;
1629       }
1630 #endif
1631       if (supports_sse4_2()) { // Silvermont
1632         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1633           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1634         }
1635       }
1636       if (FLAG_IS_DEFAULT(UseIncDec)) {
1637         FLAG_SET_DEFAULT(UseIncDec, false);
1638       }
1639     }
1640     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1641       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1642     }
1643 #ifdef COMPILER2
1644     if (UseAVX > 2) {
1645       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1646           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1647            ArrayOperationPartialInlineSize != 0 &&
1648            ArrayOperationPartialInlineSize != 16 &&
1649            ArrayOperationPartialInlineSize != 32 &&
1650            ArrayOperationPartialInlineSize != 64)) {
1651         int inline_size = 0;
1652         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1653           inline_size = 64;
1654         } else if (MaxVectorSize >= 32) {
1655           inline_size = 32;
1656         } else if (MaxVectorSize >= 16) {
1657           inline_size = 16;
1658         }
1659         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1660           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1661         }
1662         ArrayOperationPartialInlineSize = inline_size;
1663       }
1664 
1665       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1666         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1667         if (ArrayOperationPartialInlineSize) {
1668           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1669         } else {
1670           warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1671         }
1672       }
1673     }
1674 #endif
1675   }
1676 
1677 #ifdef COMPILER2
1678   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1679     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1680       OptimizeFill = false;
1681     }
1682   }
1683 #endif
1684   if (supports_sse4_2()) {
1685     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1686       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1687     }
1688   } else {
1689     if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1690       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1691     }
1692     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1693   }
1694   if (UseSSE42Intrinsics) {
1695     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1696       UseVectorizedMismatchIntrinsic = true;
1697     }
1698   } else if (UseVectorizedMismatchIntrinsic) {
1699     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1700       warning("vectorizedMismatch intrinsics are not available on this CPU");
1701     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1702   }
1703   if (UseAVX >= 2) {
1704     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1705   } else if (UseVectorizedHashCodeIntrinsic) {
1706     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1707       warning("vectorizedHashCode intrinsics are not available on this CPU");
1708     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1709   }
1710 
1711   // Use count leading zeros count instruction if available.
1712   if (supports_lzcnt()) {
1713     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1714       UseCountLeadingZerosInstruction = true;
1715     }
1716    } else if (UseCountLeadingZerosInstruction) {
1717     warning("lzcnt instruction is not available on this CPU");
1718     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1719   }
1720 
1721   // Use count trailing zeros instruction if available
1722   if (supports_bmi1()) {
1723     // tzcnt does not require VEX prefix
1724     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1725       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1726         // Don't use tzcnt if BMI1 is switched off on command line.
1727         UseCountTrailingZerosInstruction = false;
1728       } else {
1729         UseCountTrailingZerosInstruction = true;
1730       }
1731     }
1732   } else if (UseCountTrailingZerosInstruction) {
1733     warning("tzcnt instruction is not available on this CPU");
1734     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1735   }
1736 
1737   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1738   // VEX prefix is generated only when AVX > 0.
1739   if (supports_bmi1() && supports_avx()) {
1740     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1741       UseBMI1Instructions = true;
1742     }
1743   } else if (UseBMI1Instructions) {
1744     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1745     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1746   }
1747 
1748   if (supports_bmi2() && supports_avx()) {
1749     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1750       UseBMI2Instructions = true;
1751     }
1752   } else if (UseBMI2Instructions) {
1753     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1754     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1755   }
1756 
1757   // Use population count instruction if available.
1758   if (supports_popcnt()) {
1759     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1760       UsePopCountInstruction = true;
1761     }
1762   } else if (UsePopCountInstruction) {
1763     warning("POPCNT instruction is not available on this CPU");
1764     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1765   }
1766 
1767   // Use fast-string operations if available.
1768   if (supports_erms()) {
1769     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1770       UseFastStosb = true;
1771     }
1772   } else if (UseFastStosb) {
1773     warning("fast-string operations are not available on this CPU");
1774     FLAG_SET_DEFAULT(UseFastStosb, false);
1775   }
1776 
1777   // For AMD Processors use XMM/YMM MOVDQU instructions
1778   // for Object Initialization as default
1779   if (is_amd() && cpu_family() >= 0x19) {
1780     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1781       UseFastStosb = false;
1782     }
1783   }
1784 
1785 #ifdef COMPILER2
1786   if (is_intel() && MaxVectorSize > 16) {
1787     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1788       UseFastStosb = false;
1789     }
1790   }
1791 #endif
1792 
1793   // Use XMM/YMM MOVDQU instruction for Object Initialization
1794   if (!UseFastStosb && UseUnalignedLoadStores) {
1795     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1796       UseXMMForObjInit = true;
1797     }
1798   } else if (UseXMMForObjInit) {
1799     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1800     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1801   }
1802 
1803 #ifdef COMPILER2
1804   if (FLAG_IS_DEFAULT(AlignVector)) {
1805     // Modern processors allow misaligned memory operations for vectors.
1806     AlignVector = !UseUnalignedLoadStores;
1807   }
1808 #endif // COMPILER2
1809 
1810   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1811     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1812       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1813     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1814       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1815     }
1816   }
1817 
1818   // Allocation prefetch settings
1819   int cache_line_size = checked_cast<int>(prefetch_data_size());
1820   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1821       (cache_line_size > AllocatePrefetchStepSize)) {
1822     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1823   }
1824 
1825   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1826     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1827     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1828       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1829     }
1830     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1831   }
1832 
1833   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1834     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1835     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1836   }
1837 
1838   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1839     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1840         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1841       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1842     }
1843 #ifdef COMPILER2
1844     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1845       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1846     }
1847 #endif
1848   }
1849 
1850   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1851 #ifdef COMPILER2
1852     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1853       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1854     }
1855 #endif
1856   }
1857 
1858   // Prefetch settings
1859 
1860   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1861   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1862   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1863   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1864 
1865   // gc copy/scan is disabled if prefetchw isn't supported, because
1866   // Prefetch::write emits an inlined prefetchw on Linux.
1867   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1868   // The used prefetcht0 instruction works for both amd64 and em64t.
1869 
1870   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1871     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1872   }
1873   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1874     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1875   }
1876 
1877   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1878      (cache_line_size > ContendedPaddingWidth))
1879      ContendedPaddingWidth = cache_line_size;
1880 
1881   // This machine allows unaligned memory accesses
1882   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1883     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1884   }
1885 
1886 #ifndef PRODUCT
1887   if (log_is_enabled(Info, os, cpu)) {
1888     LogStream ls(Log(os, cpu)::info());
1889     outputStream* log = &ls;
1890     log->print_cr("Logical CPUs per core: %u",
1891                   logical_processors_per_package());
1892     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1893     log->print("UseSSE=%d", UseSSE);
1894     if (UseAVX > 0) {
1895       log->print("  UseAVX=%d", UseAVX);
1896     }
1897     if (UseAES) {
1898       log->print("  UseAES=1");
1899     }
1900 #ifdef COMPILER2
1901     if (MaxVectorSize > 0) {
1902       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1903     }
1904 #endif
1905     log->cr();
1906     log->print("Allocation");
1907     if (AllocatePrefetchStyle <= 0) {
1908       log->print_cr(": no prefetching");
1909     } else {
1910       log->print(" prefetching: ");
1911       if (AllocatePrefetchInstr == 0) {
1912         log->print("PREFETCHNTA");
1913       } else if (AllocatePrefetchInstr == 1) {
1914         log->print("PREFETCHT0");
1915       } else if (AllocatePrefetchInstr == 2) {
1916         log->print("PREFETCHT2");
1917       } else if (AllocatePrefetchInstr == 3) {
1918         log->print("PREFETCHW");
1919       }
1920       if (AllocatePrefetchLines > 1) {
1921         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1922       } else {
1923         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1924       }
1925     }
1926 
1927     if (PrefetchCopyIntervalInBytes > 0) {
1928       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1929     }
1930     if (PrefetchScanIntervalInBytes > 0) {
1931       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1932     }
1933     if (ContendedPaddingWidth > 0) {
1934       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1935     }
1936   }
1937 #endif // !PRODUCT
1938   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1939       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1940   }
1941   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1942       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1943   }
1944 }
1945 
1946 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1947   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1948   if (vrt == XenHVM) {
1949     st->print_cr("Xen hardware-assisted virtualization detected");
1950   } else if (vrt == KVM) {
1951     st->print_cr("KVM virtualization detected");
1952   } else if (vrt == VMWare) {
1953     st->print_cr("VMWare virtualization detected");
1954     VirtualizationSupport::print_virtualization_info(st);
1955   } else if (vrt == HyperV) {
1956     st->print_cr("Hyper-V virtualization detected");
1957   } else if (vrt == HyperVRole) {
1958     st->print_cr("Hyper-V role detected");
1959   }
1960 }
1961 
1962 bool VM_Version::compute_has_intel_jcc_erratum() {
1963   if (!is_intel_family_core()) {
1964     // Only Intel CPUs are affected.
1965     return false;
1966   }
1967   // The following table of affected CPUs is based on the following document released by Intel:
1968   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1969   switch (_model) {
1970   case 0x8E:
1971     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1972     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1973     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1974     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1975     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1976     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1977     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1978     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1979     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1980     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1981   case 0x4E:
1982     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1983     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1984     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1985     return _stepping == 0x3;
1986   case 0x55:
1987     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1988     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1989     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1990     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1991     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1992     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1993     return _stepping == 0x4 || _stepping == 0x7;
1994   case 0x5E:
1995     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1996     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1997     return _stepping == 0x3;
1998   case 0x9E:
1999     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2000     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2001     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2002     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2003     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2004     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2005     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2006     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2007     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2008     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2009     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2010     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2011     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2012     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2013     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2014   case 0xA5:
2015     // Not in Intel documentation.
2016     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2017     return true;
2018   case 0xA6:
2019     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2020     return _stepping == 0x0;
2021   case 0xAE:
2022     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2023     return _stepping == 0xA;
2024   default:
2025     // If we are running on another intel machine not recognized in the table, we are okay.
2026     return false;
2027   }
2028 }
2029 
2030 // On Xen, the cpuid instruction returns
2031 //  eax / registers[0]: Version of Xen
2032 //  ebx / registers[1]: chars 'XenV'
2033 //  ecx / registers[2]: chars 'MMXe'
2034 //  edx / registers[3]: chars 'nVMM'
2035 //
2036 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2037 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2038 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2039 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2040 //
2041 // more information :
2042 // https://kb.vmware.com/s/article/1009458
2043 //
2044 void VM_Version::check_virtualizations() {
2045   uint32_t registers[4] = {0};
2046   char signature[13] = {0};
2047 
2048   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2049   // from 0x40000000 until 0x40010000.
2050   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2051   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2052     detect_virt_stub(leaf, registers);
2053     memcpy(signature, &registers[1], 12);
2054 
2055     if (strncmp("VMwareVMware", signature, 12) == 0) {
2056       Abstract_VM_Version::_detected_virtualization = VMWare;
2057       // check for extended metrics from guestlib
2058       VirtualizationSupport::initialize();
2059     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2060       Abstract_VM_Version::_detected_virtualization = HyperV;
2061 #ifdef _WINDOWS
2062       // CPUID leaf 0x40000007 is available to the root partition only.
2063       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2064       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2065       detect_virt_stub(0x40000007, registers);
2066       if ((registers[0] != 0x0) ||
2067           (registers[1] != 0x0) ||
2068           (registers[2] != 0x0) ||
2069           (registers[3] != 0x0)) {
2070         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2071       }
2072 #endif
2073     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2074       Abstract_VM_Version::_detected_virtualization = KVM;
2075     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2076       Abstract_VM_Version::_detected_virtualization = XenHVM;
2077     }
2078   }
2079 }
2080 
2081 #ifdef COMPILER2
2082 // Determine if it's running on Cascade Lake using default options.
2083 bool VM_Version::is_default_intel_cascade_lake() {
2084   return FLAG_IS_DEFAULT(UseAVX) &&
2085          FLAG_IS_DEFAULT(MaxVectorSize) &&
2086          UseAVX > 2 &&
2087          is_intel_cascade_lake();
2088 }
2089 #endif
2090 
2091 bool VM_Version::is_intel_cascade_lake() {
2092   return is_intel_skylake() && _stepping >= 5;
2093 }
2094 
2095 bool VM_Version::is_intel_darkmont() {
2096   return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2097 }
2098 
2099 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2100 // for implementing the array copy and clear operations.
2101 // The Intel platforms that supports the serialize instruction
2102 // has improved implementation of 64-byte load/stores and so the default
2103 // threshold is set to 0 for these platforms.
2104 int VM_Version::avx3_threshold() {
2105   return (is_intel_server_family() &&
2106           supports_serialize() &&
2107           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2108 }
2109 
2110 void VM_Version::clear_apx_test_state() {
2111   clear_apx_test_state_stub();
2112 }
2113 
2114 static bool _vm_version_initialized = false;
2115 
2116 void VM_Version::initialize() {
2117   ResourceMark rm;
2118 
2119   // Making this stub must be FIRST use of assembler
2120   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2121   if (stub_blob == nullptr) {
2122     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2123   }
2124   CodeBuffer c(stub_blob);
2125   VM_Version_StubGenerator g(&c);
2126 
2127   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2128                                      g.generate_get_cpu_info());
2129   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2130                                      g.generate_detect_virt());
2131   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2132                                      g.clear_apx_test_state());
2133   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2134                                      g.generate_getCPUIDBrandString());
2135   get_processor_features();
2136 
2137   Assembler::precompute_instructions();
2138 
2139   if (VM_Version::supports_hv()) { // Supports hypervisor
2140     check_virtualizations();
2141   }
2142   _vm_version_initialized = true;
2143 }
2144 
2145 typedef enum {
2146    CPU_FAMILY_8086_8088  = 0,
2147    CPU_FAMILY_INTEL_286  = 2,
2148    CPU_FAMILY_INTEL_386  = 3,
2149    CPU_FAMILY_INTEL_486  = 4,
2150    CPU_FAMILY_PENTIUM    = 5,
2151    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2152    CPU_FAMILY_PENTIUM_4  = 0xF
2153 } FamilyFlag;
2154 
2155 typedef enum {
2156   RDTSCP_FLAG  = 0x08000000, // bit 27
2157   INTEL64_FLAG = 0x20000000  // bit 29
2158 } _featureExtendedEdxFlag;
2159 
2160 typedef enum {
2161    FPU_FLAG     = 0x00000001,
2162    VME_FLAG     = 0x00000002,
2163    DE_FLAG      = 0x00000004,
2164    PSE_FLAG     = 0x00000008,
2165    TSC_FLAG     = 0x00000010,
2166    MSR_FLAG     = 0x00000020,
2167    PAE_FLAG     = 0x00000040,
2168    MCE_FLAG     = 0x00000080,
2169    CX8_FLAG     = 0x00000100,
2170    APIC_FLAG    = 0x00000200,
2171    SEP_FLAG     = 0x00000800,
2172    MTRR_FLAG    = 0x00001000,
2173    PGE_FLAG     = 0x00002000,
2174    MCA_FLAG     = 0x00004000,
2175    CMOV_FLAG    = 0x00008000,
2176    PAT_FLAG     = 0x00010000,
2177    PSE36_FLAG   = 0x00020000,
2178    PSNUM_FLAG   = 0x00040000,
2179    CLFLUSH_FLAG = 0x00080000,
2180    DTS_FLAG     = 0x00200000,
2181    ACPI_FLAG    = 0x00400000,
2182    MMX_FLAG     = 0x00800000,
2183    FXSR_FLAG    = 0x01000000,
2184    SSE_FLAG     = 0x02000000,
2185    SSE2_FLAG    = 0x04000000,
2186    SS_FLAG      = 0x08000000,
2187    HTT_FLAG     = 0x10000000,
2188    TM_FLAG      = 0x20000000
2189 } FeatureEdxFlag;
2190 
2191 // VM_Version statics
2192 enum {
2193   ExtendedFamilyIdLength_INTEL = 16,
2194   ExtendedFamilyIdLength_AMD   = 24
2195 };
2196 
2197 const size_t VENDOR_LENGTH = 13;
2198 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2199 static char* _cpu_brand_string = nullptr;
2200 static int64_t _max_qualified_cpu_frequency = 0;
2201 
2202 static int _no_of_threads = 0;
2203 static int _no_of_cores = 0;
2204 
2205 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2206   "8086/8088",
2207   "",
2208   "286",
2209   "386",
2210   "486",
2211   "Pentium",
2212   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2213   "",
2214   "",
2215   "",
2216   "",
2217   "",
2218   "",
2219   "",
2220   "",
2221   "Pentium 4"
2222 };
2223 
2224 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2225   "",
2226   "",
2227   "",
2228   "",
2229   "5x86",
2230   "K5/K6",
2231   "Athlon/AthlonXP",
2232   "",
2233   "",
2234   "",
2235   "",
2236   "",
2237   "",
2238   "",
2239   "",
2240   "Opteron/Athlon64",
2241   "Opteron QC/Phenom",  // Barcelona et.al.
2242   "",
2243   "",
2244   "",
2245   "",
2246   "",
2247   "",
2248   "Zen"
2249 };
2250 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2251 // September 2013, Vol 3C Table 35-1
2252 const char* const _model_id_pentium_pro[] = {
2253   "",
2254   "Pentium Pro",
2255   "",
2256   "Pentium II model 3",
2257   "",
2258   "Pentium II model 5/Xeon/Celeron",
2259   "Celeron",
2260   "Pentium III/Pentium III Xeon",
2261   "Pentium III/Pentium III Xeon",
2262   "Pentium M model 9",    // Yonah
2263   "Pentium III, model A",
2264   "Pentium III, model B",
2265   "",
2266   "Pentium M model D",    // Dothan
2267   "",
2268   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2269   "",
2270   "",
2271   "",
2272   "",
2273   "",
2274   "",
2275   "Celeron",              // 0x16 Celeron 65nm
2276   "Core 2",               // 0x17 Penryn / Harpertown
2277   "",
2278   "",
2279   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2280   "Atom",                 // 0x1B Z5xx series Silverthorn
2281   "",
2282   "Core 2",               // 0x1D Dunnington (6-core)
2283   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2284   "",
2285   "",
2286   "",
2287   "",
2288   "",
2289   "",
2290   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2291   "",
2292   "",
2293   "",                     // 0x28
2294   "",
2295   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2296   "",
2297   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2298   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2299   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2300   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2301   "",
2302   "",
2303   "",
2304   "",
2305   "",
2306   "",
2307   "",
2308   "",
2309   "",
2310   "",
2311   "Ivy Bridge",           // 0x3a
2312   "",
2313   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2314   "",                     // 0x3d "Next Generation Intel Core Processor"
2315   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2316   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2317   "",
2318   "",
2319   "",
2320   "",
2321   "",
2322   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2323   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2324   nullptr
2325 };
2326 
2327 /* Brand ID is for back compatibility
2328  * Newer CPUs uses the extended brand string */
2329 const char* const _brand_id[] = {
2330   "",
2331   "Celeron processor",
2332   "Pentium III processor",
2333   "Intel Pentium III Xeon processor",
2334   "",
2335   "",
2336   "",
2337   "",
2338   "Intel Pentium 4 processor",
2339   nullptr
2340 };
2341 
2342 
2343 const char* const _feature_edx_id[] = {
2344   "On-Chip FPU",
2345   "Virtual Mode Extensions",
2346   "Debugging Extensions",
2347   "Page Size Extensions",
2348   "Time Stamp Counter",
2349   "Model Specific Registers",
2350   "Physical Address Extension",
2351   "Machine Check Exceptions",
2352   "CMPXCHG8B Instruction",
2353   "On-Chip APIC",
2354   "",
2355   "Fast System Call",
2356   "Memory Type Range Registers",
2357   "Page Global Enable",
2358   "Machine Check Architecture",
2359   "Conditional Mov Instruction",
2360   "Page Attribute Table",
2361   "36-bit Page Size Extension",
2362   "Processor Serial Number",
2363   "CLFLUSH Instruction",
2364   "",
2365   "Debug Trace Store feature",
2366   "ACPI registers in MSR space",
2367   "Intel Architecture MMX Technology",
2368   "Fast Float Point Save and Restore",
2369   "Streaming SIMD extensions",
2370   "Streaming SIMD extensions 2",
2371   "Self-Snoop",
2372   "Hyper Threading",
2373   "Thermal Monitor",
2374   "",
2375   "Pending Break Enable"
2376 };
2377 
2378 const char* const _feature_extended_edx_id[] = {
2379   "",
2380   "",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "SYSCALL/SYSRET",
2391   "",
2392   "",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "",
2399   "Execute Disable Bit",
2400   "",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "RDTSCP",
2407   "",
2408   "Intel 64 Architecture",
2409   "",
2410   ""
2411 };
2412 
2413 const char* const _feature_ecx_id[] = {
2414   "Streaming SIMD Extensions 3",
2415   "PCLMULQDQ",
2416   "64-bit DS Area",
2417   "MONITOR/MWAIT instructions",
2418   "CPL Qualified Debug Store",
2419   "Virtual Machine Extensions",
2420   "Safer Mode Extensions",
2421   "Enhanced Intel SpeedStep technology",
2422   "Thermal Monitor 2",
2423   "Supplemental Streaming SIMD Extensions 3",
2424   "L1 Context ID",
2425   "",
2426   "Fused Multiply-Add",
2427   "CMPXCHG16B",
2428   "xTPR Update Control",
2429   "Perfmon and Debug Capability",
2430   "",
2431   "Process-context identifiers",
2432   "Direct Cache Access",
2433   "Streaming SIMD extensions 4.1",
2434   "Streaming SIMD extensions 4.2",
2435   "x2APIC",
2436   "MOVBE",
2437   "Popcount instruction",
2438   "TSC-Deadline",
2439   "AESNI",
2440   "XSAVE",
2441   "OSXSAVE",
2442   "AVX",
2443   "F16C",
2444   "RDRAND",
2445   ""
2446 };
2447 
2448 const char* const _feature_extended_ecx_id[] = {
2449   "LAHF/SAHF instruction support",
2450   "Core multi-processor legacy mode",
2451   "",
2452   "",
2453   "",
2454   "Advanced Bit Manipulations: LZCNT",
2455   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2456   "Misaligned SSE mode",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "",
2476   "",
2477   "",
2478   "",
2479   "",
2480   ""
2481 };
2482 
2483 const char* VM_Version::cpu_model_description(void) {
2484   uint32_t cpu_family = extended_cpu_family();
2485   uint32_t cpu_model = extended_cpu_model();
2486   const char* model = nullptr;
2487 
2488   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2489     for (uint32_t i = 0; i <= cpu_model; i++) {
2490       model = _model_id_pentium_pro[i];
2491       if (model == nullptr) {
2492         break;
2493       }
2494     }
2495   }
2496   return model;
2497 }
2498 
2499 const char* VM_Version::cpu_brand_string(void) {
2500   if (_cpu_brand_string == nullptr) {
2501     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2502     if (nullptr == _cpu_brand_string) {
2503       return nullptr;
2504     }
2505     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2506     if (ret_val != OS_OK) {
2507       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2508       _cpu_brand_string = nullptr;
2509     }
2510   }
2511   return _cpu_brand_string;
2512 }
2513 
2514 const char* VM_Version::cpu_brand(void) {
2515   const char*  brand  = nullptr;
2516 
2517   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2518     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2519     brand = _brand_id[0];
2520     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2521       brand = _brand_id[i];
2522     }
2523   }
2524   return brand;
2525 }
2526 
2527 bool VM_Version::cpu_is_em64t(void) {
2528   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2529 }
2530 
2531 bool VM_Version::is_netburst(void) {
2532   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2533 }
2534 
2535 bool VM_Version::supports_tscinv_ext(void) {
2536   if (!supports_tscinv_bit()) {
2537     return false;
2538   }
2539 
2540   if (is_intel()) {
2541     return true;
2542   }
2543 
2544   if (is_amd()) {
2545     return !is_amd_Barcelona();
2546   }
2547 
2548   if (is_hygon()) {
2549     return true;
2550   }
2551 
2552   return false;
2553 }
2554 
2555 void VM_Version::resolve_cpu_information_details(void) {
2556 
2557   // in future we want to base this information on proper cpu
2558   // and cache topology enumeration such as:
2559   // Intel 64 Architecture Processor Topology Enumeration
2560   // which supports system cpu and cache topology enumeration
2561   // either using 2xAPICIDs or initial APICIDs
2562 
2563   // currently only rough cpu information estimates
2564   // which will not necessarily reflect the exact configuration of the system
2565 
2566   // this is the number of logical hardware threads
2567   // visible to the operating system
2568   _no_of_threads = os::processor_count();
2569 
2570   // find out number of threads per cpu package
2571   int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2572   if (threads_per_package == 0) {
2573     // Fallback code to avoid div by zero in subsequent code.
2574     // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2575     threads_per_package = threads_per_core() * cores_per_cpu();
2576   }
2577 
2578   // use amount of threads visible to the process in order to guess number of sockets
2579   _no_of_sockets = _no_of_threads / threads_per_package;
2580 
2581   // process might only see a subset of the total number of threads
2582   // from a single processor package. Virtualization/resource management for example.
2583   // If so then just write a hard 1 as num of pkgs.
2584   if (0 == _no_of_sockets) {
2585     _no_of_sockets = 1;
2586   }
2587 
2588   // estimate the number of cores
2589   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2590 }
2591 
2592 
2593 const char* VM_Version::cpu_family_description(void) {
2594   int cpu_family_id = extended_cpu_family();
2595   if (is_amd()) {
2596     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2597       return _family_id_amd[cpu_family_id];
2598     }
2599   }
2600   if (is_intel()) {
2601     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2602       return cpu_model_description();
2603     }
2604     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2605       return _family_id_intel[cpu_family_id];
2606     }
2607   }
2608   if (is_hygon()) {
2609     return "Dhyana";
2610   }
2611   return "Unknown x86";
2612 }
2613 
2614 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2615   assert(buf != nullptr, "buffer is null!");
2616   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2617 
2618   const char* cpu_type = nullptr;
2619   const char* x64 = nullptr;
2620 
2621   if (is_intel()) {
2622     cpu_type = "Intel";
2623     x64 = cpu_is_em64t() ? " Intel64" : "";
2624   } else if (is_amd()) {
2625     cpu_type = "AMD";
2626     x64 = cpu_is_em64t() ? " AMD64" : "";
2627   } else if (is_hygon()) {
2628     cpu_type = "Hygon";
2629     x64 = cpu_is_em64t() ? " AMD64" : "";
2630   } else {
2631     cpu_type = "Unknown x86";
2632     x64 = cpu_is_em64t() ? " x86_64" : "";
2633   }
2634 
2635   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2636     cpu_type,
2637     cpu_family_description(),
2638     supports_ht() ? " (HT)" : "",
2639     supports_sse3() ? " SSE3" : "",
2640     supports_ssse3() ? " SSSE3" : "",
2641     supports_sse4_1() ? " SSE4.1" : "",
2642     supports_sse4_2() ? " SSE4.2" : "",
2643     supports_sse4a() ? " SSE4A" : "",
2644     is_netburst() ? " Netburst" : "",
2645     is_intel_family_core() ? " Core" : "",
2646     x64);
2647 
2648   return OS_OK;
2649 }
2650 
2651 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2652   assert(buf != nullptr, "buffer is null!");
2653   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2654   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2655 
2656   // invoke newly generated asm code to fetch CPU Brand String
2657   getCPUIDBrandString_stub(&_cpuid_info);
2658 
2659   // fetch results into buffer
2660   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2661   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2662   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2663   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2664   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2665   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2666   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2667   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2668   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2669   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2670   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2671   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2672 
2673   return OS_OK;
2674 }
2675 
2676 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2677   guarantee(buf != nullptr, "buffer is null!");
2678   guarantee(buf_len > 0, "buffer len not enough!");
2679 
2680   unsigned int flag = 0;
2681   unsigned int fi = 0;
2682   size_t       written = 0;
2683   const char*  prefix = "";
2684 
2685 #define WRITE_TO_BUF(string)                                                          \
2686   {                                                                                   \
2687     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2688     if (res < 0) {                                                                    \
2689       return buf_len - 1;                                                             \
2690     }                                                                                 \
2691     written += res;                                                                   \
2692     if (prefix[0] == '\0') {                                                          \
2693       prefix = ", ";                                                                  \
2694     }                                                                                 \
2695   }
2696 
2697   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2698     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2699       continue; /* no hyperthreading */
2700     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2701       continue; /* no fast system call */
2702     }
2703     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2704       WRITE_TO_BUF(_feature_edx_id[fi]);
2705     }
2706   }
2707 
2708   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2709     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2710       WRITE_TO_BUF(_feature_ecx_id[fi]);
2711     }
2712   }
2713 
2714   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2715     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2716       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2717     }
2718   }
2719 
2720   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2721     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2722       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2723     }
2724   }
2725 
2726   if (supports_tscinv_bit()) {
2727       WRITE_TO_BUF("Invariant TSC");
2728   }
2729 
2730   if (supports_hybrid()) {
2731       WRITE_TO_BUF("Hybrid Architecture");
2732   }
2733 
2734   return written;
2735 }
2736 
2737 /**
2738  * Write a detailed description of the cpu to a given buffer, including
2739  * feature set.
2740  */
2741 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2742   assert(buf != nullptr, "buffer is null!");
2743   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2744 
2745   static const char* unknown = "<unknown>";
2746   char               vendor_id[VENDOR_LENGTH];
2747   const char*        family = nullptr;
2748   const char*        model = nullptr;
2749   const char*        brand = nullptr;
2750   int                outputLen = 0;
2751 
2752   family = cpu_family_description();
2753   if (family == nullptr) {
2754     family = unknown;
2755   }
2756 
2757   model = cpu_model_description();
2758   if (model == nullptr) {
2759     model = unknown;
2760   }
2761 
2762   brand = cpu_brand_string();
2763 
2764   if (brand == nullptr) {
2765     brand = cpu_brand();
2766     if (brand == nullptr) {
2767       brand = unknown;
2768     }
2769   }
2770 
2771   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2772   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2773   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2774   vendor_id[VENDOR_LENGTH-1] = '\0';
2775 
2776   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2777     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2778     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2779     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2780     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2781     "Supports: ",
2782     brand,
2783     vendor_id,
2784     family,
2785     extended_cpu_family(),
2786     model,
2787     extended_cpu_model(),
2788     cpu_stepping(),
2789     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2790     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2791     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2792     _cpuid_info.std_cpuid1_eax.value,
2793     _cpuid_info.std_cpuid1_ebx.value,
2794     _cpuid_info.std_cpuid1_ecx.value,
2795     _cpuid_info.std_cpuid1_edx.value,
2796     _cpuid_info.ext_cpuid1_eax,
2797     _cpuid_info.ext_cpuid1_ebx,
2798     _cpuid_info.ext_cpuid1_ecx,
2799     _cpuid_info.ext_cpuid1_edx);
2800 
2801   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2802     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2803     return OS_ERR;
2804   }
2805 
2806   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2807 
2808   return OS_OK;
2809 }
2810 
2811 
2812 // Fill in Abstract_VM_Version statics
2813 void VM_Version::initialize_cpu_information() {
2814   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2815   assert(!_initialized, "shouldn't be initialized yet");
2816   resolve_cpu_information_details();
2817 
2818   // initialize cpu_name and cpu_desc
2819   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2820   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2821   _initialized = true;
2822 }
2823 
2824 /**
2825  *  For information about extracting the frequency from the cpu brand string, please see:
2826  *
2827  *    Intel Processor Identification and the CPUID Instruction
2828  *    Application Note 485
2829  *    May 2012
2830  *
2831  * The return value is the frequency in Hz.
2832  */
2833 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2834   const char* const brand_string = cpu_brand_string();
2835   if (brand_string == nullptr) {
2836     return 0;
2837   }
2838   const int64_t MEGA = 1000000;
2839   int64_t multiplier = 0;
2840   int64_t frequency = 0;
2841   uint8_t idx = 0;
2842   // The brand string buffer is at most 48 bytes.
2843   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2844   for (; idx < 48-2; ++idx) {
2845     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2846     // Search brand string for "yHz" where y is M, G, or T.
2847     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2848       if (brand_string[idx] == 'M') {
2849         multiplier = MEGA;
2850       } else if (brand_string[idx] == 'G') {
2851         multiplier = MEGA * 1000;
2852       } else if (brand_string[idx] == 'T') {
2853         multiplier = MEGA * MEGA;
2854       }
2855       break;
2856     }
2857   }
2858   if (multiplier > 0) {
2859     // Compute frequency (in Hz) from brand string.
2860     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2861       frequency =  (brand_string[idx-4] - '0') * multiplier;
2862       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2863       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2864     } else { // format is "xxxx"
2865       frequency =  (brand_string[idx-4] - '0') * 1000;
2866       frequency += (brand_string[idx-3] - '0') * 100;
2867       frequency += (brand_string[idx-2] - '0') * 10;
2868       frequency += (brand_string[idx-1] - '0');
2869       frequency *= multiplier;
2870     }
2871   }
2872   return frequency;
2873 }
2874 
2875 
2876 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2877   if (_max_qualified_cpu_frequency == 0) {
2878     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2879   }
2880   return _max_qualified_cpu_frequency;
2881 }
2882 
2883 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2884   VM_Features vm_features;
2885   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2886     vm_features.set_feature(CPU_CX8);
2887   if (std_cpuid1_edx.bits.cmov != 0)
2888     vm_features.set_feature(CPU_CMOV);
2889   if (std_cpuid1_edx.bits.clflush != 0)
2890     vm_features.set_feature(CPU_FLUSH);
2891   // clflush should always be available on x86_64
2892   // if not we are in real trouble because we rely on it
2893   // to flush the code cache.
2894   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2895   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2896       ext_cpuid1_edx.bits.fxsr != 0))
2897     vm_features.set_feature(CPU_FXSR);
2898   // HT flag is set for multi-core processors also.
2899   if (threads_per_core() > 1)
2900     vm_features.set_feature(CPU_HT);
2901   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2902       ext_cpuid1_edx.bits.mmx != 0))
2903     vm_features.set_feature(CPU_MMX);
2904   if (std_cpuid1_edx.bits.sse != 0)
2905     vm_features.set_feature(CPU_SSE);
2906   if (std_cpuid1_edx.bits.sse2 != 0)
2907     vm_features.set_feature(CPU_SSE2);
2908   if (std_cpuid1_ecx.bits.sse3 != 0)
2909     vm_features.set_feature(CPU_SSE3);
2910   if (std_cpuid1_ecx.bits.ssse3 != 0)
2911     vm_features.set_feature(CPU_SSSE3);
2912   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2913     vm_features.set_feature(CPU_SSE4_1);
2914   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2915     vm_features.set_feature(CPU_SSE4_2);
2916   if (std_cpuid1_ecx.bits.popcnt != 0)
2917     vm_features.set_feature(CPU_POPCNT);
2918   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2919       xem_xcr0_eax.bits.apx_f != 0 &&
2920       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2921     vm_features.set_feature(CPU_APX_F);
2922   }
2923   if (std_cpuid1_ecx.bits.avx != 0 &&
2924       std_cpuid1_ecx.bits.osxsave != 0 &&
2925       xem_xcr0_eax.bits.sse != 0 &&
2926       xem_xcr0_eax.bits.ymm != 0) {
2927     vm_features.set_feature(CPU_AVX);
2928     vm_features.set_feature(CPU_VZEROUPPER);
2929     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2930       vm_features.set_feature(CPU_SHA512);
2931     if (std_cpuid1_ecx.bits.f16c != 0)
2932       vm_features.set_feature(CPU_F16C);
2933     if (sef_cpuid7_ebx.bits.avx2 != 0) {
2934       vm_features.set_feature(CPU_AVX2);
2935       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2936         vm_features.set_feature(CPU_AVX_IFMA);
2937     }
2938     if (sef_cpuid7_ecx.bits.gfni != 0)
2939         vm_features.set_feature(CPU_GFNI);
2940     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2941         xem_xcr0_eax.bits.opmask != 0 &&
2942         xem_xcr0_eax.bits.zmm512 != 0 &&
2943         xem_xcr0_eax.bits.zmm32 != 0) {
2944       vm_features.set_feature(CPU_AVX512F);
2945       if (sef_cpuid7_ebx.bits.avx512cd != 0)
2946         vm_features.set_feature(CPU_AVX512CD);
2947       if (sef_cpuid7_ebx.bits.avx512dq != 0)
2948         vm_features.set_feature(CPU_AVX512DQ);
2949       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2950         vm_features.set_feature(CPU_AVX512_IFMA);
2951       if (sef_cpuid7_ebx.bits.avx512pf != 0)
2952         vm_features.set_feature(CPU_AVX512PF);
2953       if (sef_cpuid7_ebx.bits.avx512er != 0)
2954         vm_features.set_feature(CPU_AVX512ER);
2955       if (sef_cpuid7_ebx.bits.avx512bw != 0)
2956         vm_features.set_feature(CPU_AVX512BW);
2957       if (sef_cpuid7_ebx.bits.avx512vl != 0)
2958         vm_features.set_feature(CPU_AVX512VL);
2959       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2960         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2961       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2962         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2963       if (sef_cpuid7_ecx.bits.vaes != 0)
2964         vm_features.set_feature(CPU_AVX512_VAES);
2965       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2966         vm_features.set_feature(CPU_AVX512_VNNI);
2967       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2968         vm_features.set_feature(CPU_AVX512_BITALG);
2969       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2970         vm_features.set_feature(CPU_AVX512_VBMI);
2971       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2972         vm_features.set_feature(CPU_AVX512_VBMI2);
2973     }
2974     if (is_intel()) {
2975       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2976           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2977           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2978           xem_xcr0_eax.bits.opmask != 0 &&
2979           xem_xcr0_eax.bits.zmm512 != 0 &&
2980           xem_xcr0_eax.bits.zmm32 != 0) {
2981         vm_features.set_feature(CPU_AVX10_1);
2982         vm_features.set_feature(CPU_AVX512F);
2983         vm_features.set_feature(CPU_AVX512CD);
2984         vm_features.set_feature(CPU_AVX512DQ);
2985         vm_features.set_feature(CPU_AVX512PF);
2986         vm_features.set_feature(CPU_AVX512ER);
2987         vm_features.set_feature(CPU_AVX512BW);
2988         vm_features.set_feature(CPU_AVX512VL);
2989         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2990         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2991         vm_features.set_feature(CPU_AVX512_VAES);
2992         vm_features.set_feature(CPU_AVX512_VNNI);
2993         vm_features.set_feature(CPU_AVX512_BITALG);
2994         vm_features.set_feature(CPU_AVX512_VBMI);
2995         vm_features.set_feature(CPU_AVX512_VBMI2);
2996         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
2997           vm_features.set_feature(CPU_AVX10_2);
2998         }
2999       }
3000     }
3001   }
3002 
3003   if (std_cpuid1_ecx.bits.hv != 0)
3004     vm_features.set_feature(CPU_HV);
3005   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3006     vm_features.set_feature(CPU_BMI1);
3007   if (std_cpuid1_edx.bits.tsc != 0)
3008     vm_features.set_feature(CPU_TSC);
3009   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3010     vm_features.set_feature(CPU_TSCINV_BIT);
3011   if (std_cpuid1_ecx.bits.aes != 0)
3012     vm_features.set_feature(CPU_AES);
3013   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3014     vm_features.set_feature(CPU_LZCNT);
3015   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3016     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3017   if (sef_cpuid7_ebx.bits.erms != 0)
3018     vm_features.set_feature(CPU_ERMS);
3019   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3020     vm_features.set_feature(CPU_FSRM);
3021   if (std_cpuid1_ecx.bits.clmul != 0)
3022     vm_features.set_feature(CPU_CLMUL);
3023   if (sef_cpuid7_ebx.bits.rtm != 0)
3024     vm_features.set_feature(CPU_RTM);
3025   if (sef_cpuid7_ebx.bits.adx != 0)
3026      vm_features.set_feature(CPU_ADX);
3027   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3028     vm_features.set_feature(CPU_BMI2);
3029   if (sef_cpuid7_ebx.bits.sha != 0)
3030     vm_features.set_feature(CPU_SHA);
3031   if (std_cpuid1_ecx.bits.fma != 0)
3032     vm_features.set_feature(CPU_FMA);
3033   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3034     vm_features.set_feature(CPU_FLUSHOPT);
3035   if (sef_cpuid7_ebx.bits.clwb != 0)
3036     vm_features.set_feature(CPU_CLWB);
3037   if (ext_cpuid1_edx.bits.rdtscp != 0)
3038     vm_features.set_feature(CPU_RDTSCP);
3039   if (sef_cpuid7_ecx.bits.rdpid != 0)
3040     vm_features.set_feature(CPU_RDPID);
3041 
3042   // AMD|Hygon additional features.
3043   if (is_amd_family()) {
3044     // PREFETCHW was checked above, check TDNOW here.
3045     if ((ext_cpuid1_edx.bits.tdnow != 0))
3046       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3047     if (ext_cpuid1_ecx.bits.sse4a != 0)
3048       vm_features.set_feature(CPU_SSE4A);
3049   }
3050 
3051   // Intel additional features.
3052   if (is_intel()) {
3053     if (sef_cpuid7_edx.bits.serialize != 0)
3054       vm_features.set_feature(CPU_SERIALIZE);
3055     if (sef_cpuid7_edx.bits.hybrid != 0)
3056       vm_features.set_feature(CPU_HYBRID);
3057     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3058       vm_features.set_feature(CPU_AVX512_FP16);
3059   }
3060 
3061   // ZX additional features.
3062   if (is_zx()) {
3063     // We do not know if these are supported by ZX, so we cannot trust
3064     // common CPUID bit for them.
3065     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3066     vm_features.clear_feature(CPU_CLWB);
3067   }
3068 
3069   // Protection key features.
3070   if (sef_cpuid7_ecx.bits.pku != 0) {
3071     vm_features.set_feature(CPU_PKU);
3072   }
3073   if (sef_cpuid7_ecx.bits.ospke != 0) {
3074     vm_features.set_feature(CPU_OSPKE);
3075   }
3076 
3077   // Control flow enforcement (CET) features.
3078   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3079     vm_features.set_feature(CPU_CET_SS);
3080   }
3081   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3082     vm_features.set_feature(CPU_CET_IBT);
3083   }
3084 
3085   // Composite features.
3086   if (supports_tscinv_bit() &&
3087       ((is_amd_family() && !is_amd_Barcelona()) ||
3088        is_intel_tsc_synched_at_init())) {
3089     vm_features.set_feature(CPU_TSCINV);
3090   }
3091   return vm_features;
3092 }
3093 
3094 bool VM_Version::os_supports_avx_vectors() {
3095   bool retVal = false;
3096   int nreg = 4;
3097   if (supports_evex()) {
3098     // Verify that OS save/restore all bits of EVEX registers
3099     // during signal processing.
3100     retVal = true;
3101     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3102       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3103         retVal = false;
3104         break;
3105       }
3106     }
3107   } else if (supports_avx()) {
3108     // Verify that OS save/restore all bits of AVX registers
3109     // during signal processing.
3110     retVal = true;
3111     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3112       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3113         retVal = false;
3114         break;
3115       }
3116     }
3117     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3118     if (retVal == false) {
3119       // Verify that OS save/restore all bits of EVEX registers
3120       // during signal processing.
3121       retVal = true;
3122       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3123         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3124           retVal = false;
3125           break;
3126         }
3127       }
3128     }
3129   }
3130   return retVal;
3131 }
3132 
3133 bool VM_Version::os_supports_apx_egprs() {
3134   if (!supports_apx_f()) {
3135     return false;
3136   }
3137   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3138       _cpuid_info.apx_save[1] != egpr_test_value()) {
3139     return false;
3140   }
3141   return true;
3142 }
3143 
3144 uint VM_Version::cores_per_cpu() {
3145   uint result = 1;
3146   if (is_intel()) {
3147     bool supports_topology = supports_processor_topology();
3148     if (supports_topology) {
3149       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3150                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3151     }
3152     if (!supports_topology || result == 0) {
3153       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3154     }
3155   } else if (is_amd_family()) {
3156     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3157     if (cpu_family() >= 0x17) { // Zen or later
3158       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3159     }
3160   } else if (is_zx()) {
3161     bool supports_topology = supports_processor_topology();
3162     if (supports_topology) {
3163       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3164                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3165     }
3166     if (!supports_topology || result == 0) {
3167       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3168     }
3169   }
3170   return result;
3171 }
3172 
3173 uint VM_Version::threads_per_core() {
3174   uint result = 1;
3175   if (is_intel() && supports_processor_topology()) {
3176     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3177   } else if (is_zx() && supports_processor_topology()) {
3178     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3179   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3180     if (cpu_family() >= 0x17) {
3181       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3182     } else {
3183       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3184                  cores_per_cpu();
3185     }
3186   }
3187   return (result == 0 ? 1 : result);
3188 }
3189 
3190 uint VM_Version::L1_line_size() {
3191   uint result = 0;
3192   if (is_intel()) {
3193     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3194   } else if (is_amd_family()) {
3195     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3196   } else if (is_zx()) {
3197     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3198   }
3199   if (result < 32) // not defined ?
3200     result = 32;   // 32 bytes by default on x86 and other x64
3201   return result;
3202 }
3203 
3204 bool VM_Version::is_intel_tsc_synched_at_init() {
3205   if (is_intel_family_core()) {
3206     uint32_t ext_model = extended_cpu_model();
3207     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3208         ext_model == CPU_MODEL_WESTMERE_EP    ||
3209         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3210         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3211       // <= 2-socket invariant tsc support. EX versions are usually used
3212       // in > 2-socket systems and likely don't synchronize tscs at
3213       // initialization.
3214       // Code that uses tsc values must be prepared for them to arbitrarily
3215       // jump forward or backward.
3216       return true;
3217     }
3218   }
3219   return false;
3220 }
3221 
3222 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3223   // Hardware prefetching (distance/size in bytes):
3224   // Pentium 3 -  64 /  32
3225   // Pentium 4 - 256 / 128
3226   // Athlon    -  64 /  32 ????
3227   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3228   // Core      - 128 /  64
3229   //
3230   // Software prefetching (distance in bytes / instruction with best score):
3231   // Pentium 3 - 128 / prefetchnta
3232   // Pentium 4 - 512 / prefetchnta
3233   // Athlon    - 128 / prefetchnta
3234   // Opteron   - 256 / prefetchnta
3235   // Core      - 256 / prefetchnta
3236   // It will be used only when AllocatePrefetchStyle > 0
3237 
3238   if (is_amd_family()) { // AMD | Hygon
3239     if (supports_sse2()) {
3240       return 256; // Opteron
3241     } else {
3242       return 128; // Athlon
3243     }
3244   } else { // Intel
3245     if (supports_sse3() && is_intel_server_family()) {
3246       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3247         return 192;
3248       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3249         return 384;
3250       }
3251     }
3252     if (supports_sse2()) {
3253       if (is_intel_server_family()) {
3254         return 256; // Pentium M, Core, Core2
3255       } else {
3256         return 512; // Pentium 4
3257       }
3258     } else {
3259       return 128; // Pentium 3 (and all other old CPUs)
3260     }
3261   }
3262 }
3263 
3264 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3265   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3266   switch (id) {
3267   case vmIntrinsics::_floatToFloat16:
3268   case vmIntrinsics::_float16ToFloat:
3269     if (!supports_float16()) {
3270       return false;
3271     }
3272     break;
3273   default:
3274     break;
3275   }
3276   return true;
3277 }
3278 
3279 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3280   int i = 0;
3281   ss.join([&]() {
3282     while (i < MAX_CPU_FEATURES) {
3283       if (_features.supports_feature((VM_Version::Feature_Flag)i)) {
3284         return _features_names[i++];
3285       }
3286       i += 1;
3287     }
3288     return (const char*)nullptr;
3289   }, ", ");
3290 }