1 /*
   2  * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "asm/macroAssembler.hpp"
  26 #include "asm/macroAssembler.inline.hpp"
  27 #include "classfile/vmIntrinsics.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/checkedCast.hpp"
  41 #include "utilities/ostream.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_NAME
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2550;
  66 
  67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
  68 
  69 VM_Version::VM_Features VM_Version::_features;
  70 VM_Version::VM_Features VM_Version::_cpu_features;
  71 
  72 extern "C" {
  73   typedef void (*get_cpu_info_stub_t)(void*);
  74   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  75   typedef void (*clear_apx_test_state_t)(void);
  76   typedef void (*getCPUIDBrandString_stub_t)(void*);
  77 }
  78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  79 static detect_virt_stub_t detect_virt_stub = nullptr;
  80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
  82 
  83 bool VM_Version::supports_clflush() {
  84   // clflush should always be available on x86_64
  85   // if not we are in real trouble because we rely on it
  86   // to flush the code cache.
  87   // Unfortunately, Assembler::clflush is currently called as part
  88   // of generation of the code cache flush routine. This happens
  89   // under Universe::init before the processor features are set
  90   // up. Assembler::flush calls this routine to check that clflush
  91   // is allowed. So, we give the caller a free pass if Universe init
  92   // is still in progress.
  93   assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
  94   return true;
  95 }
  96 
  97 #define CPUID_STANDARD_FN   0x0
  98 #define CPUID_STANDARD_FN_1 0x1
  99 #define CPUID_STANDARD_FN_4 0x4
 100 #define CPUID_STANDARD_FN_B 0xb
 101 
 102 #define CPUID_EXTENDED_FN   0x80000000
 103 #define CPUID_EXTENDED_FN_1 0x80000001
 104 #define CPUID_EXTENDED_FN_2 0x80000002
 105 #define CPUID_EXTENDED_FN_3 0x80000003
 106 #define CPUID_EXTENDED_FN_4 0x80000004
 107 #define CPUID_EXTENDED_FN_7 0x80000007
 108 #define CPUID_EXTENDED_FN_8 0x80000008
 109 
 110 class VM_Version_StubGenerator: public StubCodeGenerator {
 111  public:
 112 
 113   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 114 
 115   address clear_apx_test_state() {
 116 #   define __ _masm->
 117     address start = __ pc();
 118     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 119     // handling guarantees that preserved register values post signal handling were
 120     // re-instantiated by operating system and not because they were not modified externally.
 121 
 122     bool save_apx = UseAPX;
 123     VM_Version::set_apx_cpuFeatures();
 124     UseAPX = true;
 125     // EGPR state save/restoration.
 126     __ mov64(r16, 0L);
 127     __ mov64(r31, 0L);
 128     UseAPX = save_apx;
 129     VM_Version::clean_cpuFeatures();
 130     __ ret(0);
 131     return start;
 132   }
 133 
 134   address generate_get_cpu_info() {
 135     // Flags to test CPU type.
 136     const uint32_t HS_EFL_AC = 0x40000;
 137     const uint32_t HS_EFL_ID = 0x200000;
 138     // Values for when we don't have a CPUID instruction.
 139     const int      CPU_FAMILY_SHIFT = 8;
 140     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 141     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 142     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 143 
 144     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
 145     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 146     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
 147     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 148 
 149     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 150 #   define __ _masm->
 151 
 152     address start = __ pc();
 153 
 154     //
 155     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 156     //
 157     // rcx and rdx are first and second argument registers on windows
 158 
 159     __ push(rbp);
 160     __ mov(rbp, c_rarg0); // cpuid_info address
 161     __ push(rbx);
 162     __ push(rsi);
 163     __ pushf();          // preserve rbx, and flags
 164     __ pop(rax);
 165     __ push(rax);
 166     __ mov(rcx, rax);
 167     //
 168     // if we are unable to change the AC flag, we have a 386
 169     //
 170     __ xorl(rax, HS_EFL_AC);
 171     __ push(rax);
 172     __ popf();
 173     __ pushf();
 174     __ pop(rax);
 175     __ cmpptr(rax, rcx);
 176     __ jccb(Assembler::notEqual, detect_486);
 177 
 178     __ movl(rax, CPU_FAMILY_386);
 179     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 180     __ jmp(done);
 181 
 182     //
 183     // If we are unable to change the ID flag, we have a 486 which does
 184     // not support the "cpuid" instruction.
 185     //
 186     __ bind(detect_486);
 187     __ mov(rax, rcx);
 188     __ xorl(rax, HS_EFL_ID);
 189     __ push(rax);
 190     __ popf();
 191     __ pushf();
 192     __ pop(rax);
 193     __ cmpptr(rcx, rax);
 194     __ jccb(Assembler::notEqual, detect_586);
 195 
 196     __ bind(cpu486);
 197     __ movl(rax, CPU_FAMILY_486);
 198     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 199     __ jmp(done);
 200 
 201     //
 202     // At this point, we have a chip which supports the "cpuid" instruction
 203     //
 204     __ bind(detect_586);
 205     __ xorl(rax, rax);
 206     __ cpuid();
 207     __ orl(rax, rax);
 208     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 209                                         // value of at least 1, we give up and
 210                                         // assume a 486
 211     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 212     __ movl(Address(rsi, 0), rax);
 213     __ movl(Address(rsi, 4), rbx);
 214     __ movl(Address(rsi, 8), rcx);
 215     __ movl(Address(rsi,12), rdx);
 216 
 217     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 218     __ jccb(Assembler::belowEqual, std_cpuid4);
 219 
 220     //
 221     // cpuid(0xB) Processor Topology
 222     //
 223     __ movl(rax, 0xb);
 224     __ xorl(rcx, rcx);   // Threads level
 225     __ cpuid();
 226 
 227     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 228     __ movl(Address(rsi, 0), rax);
 229     __ movl(Address(rsi, 4), rbx);
 230     __ movl(Address(rsi, 8), rcx);
 231     __ movl(Address(rsi,12), rdx);
 232 
 233     __ movl(rax, 0xb);
 234     __ movl(rcx, 1);     // Cores level
 235     __ cpuid();
 236     __ push(rax);
 237     __ andl(rax, 0x1f);  // Determine if valid topology level
 238     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 239     __ andl(rax, 0xffff);
 240     __ pop(rax);
 241     __ jccb(Assembler::equal, std_cpuid4);
 242 
 243     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 244     __ movl(Address(rsi, 0), rax);
 245     __ movl(Address(rsi, 4), rbx);
 246     __ movl(Address(rsi, 8), rcx);
 247     __ movl(Address(rsi,12), rdx);
 248 
 249     __ movl(rax, 0xb);
 250     __ movl(rcx, 2);     // Packages level
 251     __ cpuid();
 252     __ push(rax);
 253     __ andl(rax, 0x1f);  // Determine if valid topology level
 254     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 255     __ andl(rax, 0xffff);
 256     __ pop(rax);
 257     __ jccb(Assembler::equal, std_cpuid4);
 258 
 259     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 260     __ movl(Address(rsi, 0), rax);
 261     __ movl(Address(rsi, 4), rbx);
 262     __ movl(Address(rsi, 8), rcx);
 263     __ movl(Address(rsi,12), rdx);
 264 
 265     //
 266     // cpuid(0x4) Deterministic cache params
 267     //
 268     __ bind(std_cpuid4);
 269     __ movl(rax, 4);
 270     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 271     __ jccb(Assembler::greater, std_cpuid1);
 272 
 273     __ xorl(rcx, rcx);   // L1 cache
 274     __ cpuid();
 275     __ push(rax);
 276     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 277     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 278     __ pop(rax);
 279     __ jccb(Assembler::equal, std_cpuid1);
 280 
 281     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 282     __ movl(Address(rsi, 0), rax);
 283     __ movl(Address(rsi, 4), rbx);
 284     __ movl(Address(rsi, 8), rcx);
 285     __ movl(Address(rsi,12), rdx);
 286 
 287     //
 288     // Standard cpuid(0x1)
 289     //
 290     __ bind(std_cpuid1);
 291     __ movl(rax, 1);
 292     __ cpuid();
 293     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 294     __ movl(Address(rsi, 0), rax);
 295     __ movl(Address(rsi, 4), rbx);
 296     __ movl(Address(rsi, 8), rcx);
 297     __ movl(Address(rsi,12), rdx);
 298 
 299     //
 300     // Check if OS has enabled XGETBV instruction to access XCR0
 301     // (OSXSAVE feature flag) and CPU supports AVX
 302     //
 303     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 304     __ cmpl(rcx, 0x18000000);
 305     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 306 
 307     //
 308     // XCR0, XFEATURE_ENABLED_MASK register
 309     //
 310     __ xorl(rcx, rcx);   // zero for XCR0 register
 311     __ xgetbv();
 312     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 313     __ movl(Address(rsi, 0), rax);
 314     __ movl(Address(rsi, 4), rdx);
 315 
 316     //
 317     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 318     //
 319     __ bind(sef_cpuid);
 320     __ movl(rax, 7);
 321     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 322     __ jccb(Assembler::greater, ext_cpuid);
 323     // ECX = 0
 324     __ xorl(rcx, rcx);
 325     __ cpuid();
 326     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 327     __ movl(Address(rsi, 0), rax);
 328     __ movl(Address(rsi, 4), rbx);
 329     __ movl(Address(rsi, 8), rcx);
 330     __ movl(Address(rsi, 12), rdx);
 331 
 332     //
 333     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 334     //
 335     __ bind(sefsl1_cpuid);
 336     __ movl(rax, 7);
 337     __ movl(rcx, 1);
 338     __ cpuid();
 339     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 340     __ movl(Address(rsi, 0), rax);
 341     __ movl(Address(rsi, 4), rdx);
 342 
 343     //
 344     // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
 345     //
 346     __ bind(std_cpuid29);
 347     __ movl(rax, 0x29);
 348     __ movl(rcx, 0);
 349     __ cpuid();
 350     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
 351     __ movl(Address(rsi, 0), rbx);
 352 
 353     //
 354     // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
 355     //
 356     __ bind(std_cpuid24);
 357     __ movl(rax, 0x24);
 358     __ movl(rcx, 0);
 359     __ cpuid();
 360     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
 361     __ movl(Address(rsi, 0), rax);
 362     __ movl(Address(rsi, 4), rbx);
 363 
 364     //
 365     // Extended cpuid(0x80000000)
 366     //
 367     __ bind(ext_cpuid);
 368     __ movl(rax, 0x80000000);
 369     __ cpuid();
 370     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 371     __ jcc(Assembler::belowEqual, done);
 372     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 373     __ jcc(Assembler::belowEqual, ext_cpuid1);
 374     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 375     __ jccb(Assembler::belowEqual, ext_cpuid5);
 376     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 377     __ jccb(Assembler::belowEqual, ext_cpuid7);
 378     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 379     __ jccb(Assembler::belowEqual, ext_cpuid8);
 380     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 381     __ jccb(Assembler::below, ext_cpuid8);
 382     //
 383     // Extended cpuid(0x8000001E)
 384     //
 385     __ movl(rax, 0x8000001E);
 386     __ cpuid();
 387     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 388     __ movl(Address(rsi, 0), rax);
 389     __ movl(Address(rsi, 4), rbx);
 390     __ movl(Address(rsi, 8), rcx);
 391     __ movl(Address(rsi,12), rdx);
 392 
 393     //
 394     // Extended cpuid(0x80000008)
 395     //
 396     __ bind(ext_cpuid8);
 397     __ movl(rax, 0x80000008);
 398     __ cpuid();
 399     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 400     __ movl(Address(rsi, 0), rax);
 401     __ movl(Address(rsi, 4), rbx);
 402     __ movl(Address(rsi, 8), rcx);
 403     __ movl(Address(rsi,12), rdx);
 404 
 405     //
 406     // Extended cpuid(0x80000007)
 407     //
 408     __ bind(ext_cpuid7);
 409     __ movl(rax, 0x80000007);
 410     __ cpuid();
 411     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 412     __ movl(Address(rsi, 0), rax);
 413     __ movl(Address(rsi, 4), rbx);
 414     __ movl(Address(rsi, 8), rcx);
 415     __ movl(Address(rsi,12), rdx);
 416 
 417     //
 418     // Extended cpuid(0x80000005)
 419     //
 420     __ bind(ext_cpuid5);
 421     __ movl(rax, 0x80000005);
 422     __ cpuid();
 423     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 424     __ movl(Address(rsi, 0), rax);
 425     __ movl(Address(rsi, 4), rbx);
 426     __ movl(Address(rsi, 8), rcx);
 427     __ movl(Address(rsi,12), rdx);
 428 
 429     //
 430     // Extended cpuid(0x80000001)
 431     //
 432     __ bind(ext_cpuid1);
 433     __ movl(rax, 0x80000001);
 434     __ cpuid();
 435     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 436     __ movl(Address(rsi, 0), rax);
 437     __ movl(Address(rsi, 4), rbx);
 438     __ movl(Address(rsi, 8), rcx);
 439     __ movl(Address(rsi,12), rdx);
 440 
 441     //
 442     // Check if OS has enabled XGETBV instruction to access XCR0
 443     // (OSXSAVE feature flag) and CPU supports APX
 444     //
 445     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 446     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 447     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 448     __ movl(rax, 0x200000);
 449     __ andl(rax, Address(rsi, 4));
 450     __ jcc(Assembler::equal, vector_save_restore);
 451     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 452     __ movl(rax, 0x80000);
 453     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 454     __ jcc(Assembler::equal, vector_save_restore);
 455 
 456     bool save_apx = UseAPX;
 457     VM_Version::set_apx_cpuFeatures();
 458     UseAPX = true;
 459     __ mov64(r16, VM_Version::egpr_test_value());
 460     __ mov64(r31, VM_Version::egpr_test_value());
 461     __ xorl(rsi, rsi);
 462     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 463     // Generate SEGV
 464     __ movl(rax, Address(rsi, 0));
 465 
 466     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 467     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 468     __ movq(Address(rsi, 0), r16);
 469     __ movq(Address(rsi, 8), r31);
 470 
 471     //
 472     // Query CPUID 0xD.19 for APX XSAVE offset
 473     // Extended State Enumeration Sub-leaf 19 (APX)
 474     // EAX = size of APX state (should be 128)
 475     // EBX = offset in standard XSAVE format
 476     //
 477     __ movl(rax, 0xD);
 478     __ movl(rcx, 19);
 479     __ cpuid();
 480     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
 481     __ movl(Address(rsi, 0), rax);
 482     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
 483     __ movl(Address(rsi, 0), rbx);
 484 
 485     UseAPX = save_apx;
 486     __ bind(vector_save_restore);
 487     //
 488     // Check if OS has enabled XGETBV instruction to access XCR0
 489     // (OSXSAVE feature flag) and CPU supports AVX
 490     //
 491     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 492     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 493     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 494     __ cmpl(rcx, 0x18000000);
 495     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 496 
 497     __ movl(rax, 0x6);
 498     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 499     __ cmpl(rax, 0x6);
 500     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 501 
 502     // we need to bridge farther than imm8, so we use this island as a thunk
 503     __ bind(done);
 504     __ jmp(wrapup);
 505 
 506     __ bind(start_simd_check);
 507     //
 508     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 509     // registers are not restored after a signal processing.
 510     // Generate SEGV here (reference through null)
 511     // and check upper YMM/ZMM bits after it.
 512     //
 513     int saved_useavx = UseAVX;
 514     int saved_usesse = UseSSE;
 515 
 516     // If UseAVX is uninitialized or is set by the user to include EVEX
 517     if (use_evex) {
 518       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 519       // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
 520       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 521       __ movl(rax, 0x10000);
 522       __ andl(rax, Address(rsi, 4));
 523       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 524       __ movl(rbx, 0x80000);
 525       __ andl(rbx, Address(rsi, 4));
 526       __ orl(rax, rbx);
 527       __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
 528       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 529       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 530       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 531       __ movl(rax, 0xE0);
 532       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 533       __ cmpl(rax, 0xE0);
 534       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 535 
 536       if (FLAG_IS_DEFAULT(UseAVX)) {
 537         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 538         __ movl(rax, Address(rsi, 0));
 539         __ cmpl(rax, 0x50654);              // If it is Skylake
 540         __ jcc(Assembler::equal, legacy_setup);
 541       }
 542       // EVEX setup: run in lowest evex mode
 543       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 544       UseAVX = 3;
 545       UseSSE = 2;
 546 #ifdef _WINDOWS
 547       // xmm5-xmm15 are not preserved by caller on windows
 548       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 549       __ subptr(rsp, 64);
 550       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 551       __ subptr(rsp, 64);
 552       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 553       __ subptr(rsp, 64);
 554       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 555 #endif // _WINDOWS
 556 
 557       // load value into all 64 bytes of zmm7 register
 558       __ movl(rcx, VM_Version::ymm_test_value());
 559       __ movdl(xmm0, rcx);
 560       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 561       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 562       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 563       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 564       VM_Version::clean_cpuFeatures();
 565       __ jmp(save_restore_except);
 566     }
 567 
 568     __ bind(legacy_setup);
 569     // AVX setup
 570     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 571     UseAVX = 1;
 572     UseSSE = 2;
 573 #ifdef _WINDOWS
 574     __ subptr(rsp, 32);
 575     __ vmovdqu(Address(rsp, 0), xmm7);
 576     __ subptr(rsp, 32);
 577     __ vmovdqu(Address(rsp, 0), xmm8);
 578     __ subptr(rsp, 32);
 579     __ vmovdqu(Address(rsp, 0), xmm15);
 580 #endif // _WINDOWS
 581 
 582     // load value into all 32 bytes of ymm7 register
 583     __ movl(rcx, VM_Version::ymm_test_value());
 584 
 585     __ movdl(xmm0, rcx);
 586     __ pshufd(xmm0, xmm0, 0x00);
 587     __ vinsertf128_high(xmm0, xmm0);
 588     __ vmovdqu(xmm7, xmm0);
 589     __ vmovdqu(xmm8, xmm0);
 590     __ vmovdqu(xmm15, xmm0);
 591     VM_Version::clean_cpuFeatures();
 592 
 593     __ bind(save_restore_except);
 594     __ xorl(rsi, rsi);
 595     VM_Version::set_cpuinfo_segv_addr(__ pc());
 596     // Generate SEGV
 597     __ movl(rax, Address(rsi, 0));
 598 
 599     VM_Version::set_cpuinfo_cont_addr(__ pc());
 600     // Returns here after signal. Save xmm0 to check it later.
 601 
 602     // If UseAVX is uninitialized or is set by the user to include EVEX
 603     if (use_evex) {
 604       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 605       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 606       __ movl(rax, 0x10000);
 607       __ andl(rax, Address(rsi, 4));
 608       __ jcc(Assembler::equal, legacy_save_restore);
 609       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 610       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 611       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 612       __ movl(rax, 0xE0);
 613       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 614       __ cmpl(rax, 0xE0);
 615       __ jcc(Assembler::notEqual, legacy_save_restore);
 616 
 617       if (FLAG_IS_DEFAULT(UseAVX)) {
 618         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 619         __ movl(rax, Address(rsi, 0));
 620         __ cmpl(rax, 0x50654);              // If it is Skylake
 621         __ jcc(Assembler::equal, legacy_save_restore);
 622       }
 623       // EVEX check: run in lowest evex mode
 624       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 625       UseAVX = 3;
 626       UseSSE = 2;
 627       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 628       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 629       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 630       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 631       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 632 
 633 #ifdef _WINDOWS
 634       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 635       __ addptr(rsp, 64);
 636       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 637       __ addptr(rsp, 64);
 638       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 639       __ addptr(rsp, 64);
 640 #endif // _WINDOWS
 641       generate_vzeroupper(wrapup);
 642       VM_Version::clean_cpuFeatures();
 643       UseAVX = saved_useavx;
 644       UseSSE = saved_usesse;
 645       __ jmp(wrapup);
 646    }
 647 
 648     __ bind(legacy_save_restore);
 649     // AVX check
 650     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 651     UseAVX = 1;
 652     UseSSE = 2;
 653     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 654     __ vmovdqu(Address(rsi, 0), xmm0);
 655     __ vmovdqu(Address(rsi, 32), xmm7);
 656     __ vmovdqu(Address(rsi, 64), xmm8);
 657     __ vmovdqu(Address(rsi, 96), xmm15);
 658 
 659 #ifdef _WINDOWS
 660     __ vmovdqu(xmm15, Address(rsp, 0));
 661     __ addptr(rsp, 32);
 662     __ vmovdqu(xmm8, Address(rsp, 0));
 663     __ addptr(rsp, 32);
 664     __ vmovdqu(xmm7, Address(rsp, 0));
 665     __ addptr(rsp, 32);
 666 #endif // _WINDOWS
 667 
 668     generate_vzeroupper(wrapup);
 669     VM_Version::clean_cpuFeatures();
 670     UseAVX = saved_useavx;
 671     UseSSE = saved_usesse;
 672 
 673     __ bind(wrapup);
 674     __ popf();
 675     __ pop(rsi);
 676     __ pop(rbx);
 677     __ pop(rbp);
 678     __ ret(0);
 679 
 680 #   undef __
 681 
 682     return start;
 683   };
 684   void generate_vzeroupper(Label& L_wrapup) {
 685 #   define __ _masm->
 686     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 687     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 688     __ jcc(Assembler::notEqual, L_wrapup);
 689     __ movl(rcx, 0x0FFF0FF0);
 690     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 691     __ andl(rcx, Address(rsi, 0));
 692     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 693     __ jcc(Assembler::equal, L_wrapup);
 694     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 695     __ jcc(Assembler::equal, L_wrapup);
 696     // vzeroupper() will use a pre-computed instruction sequence that we
 697     // can't compute until after we've determined CPU capabilities. Use
 698     // uncached variant here directly to be able to bootstrap correctly
 699     __ vzeroupper_uncached();
 700 #   undef __
 701   }
 702   address generate_detect_virt() {
 703     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 704 #   define __ _masm->
 705 
 706     address start = __ pc();
 707 
 708     // Evacuate callee-saved registers
 709     __ push(rbp);
 710     __ push(rbx);
 711     __ push(rsi); // for Windows
 712 
 713     __ mov(rax, c_rarg0); // CPUID leaf
 714     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 715 
 716     __ cpuid();
 717 
 718     // Store result to register array
 719     __ movl(Address(rsi,  0), rax);
 720     __ movl(Address(rsi,  4), rbx);
 721     __ movl(Address(rsi,  8), rcx);
 722     __ movl(Address(rsi, 12), rdx);
 723 
 724     // Epilogue
 725     __ pop(rsi);
 726     __ pop(rbx);
 727     __ pop(rbp);
 728     __ ret(0);
 729 
 730 #   undef __
 731 
 732     return start;
 733   };
 734 
 735 
 736   address generate_getCPUIDBrandString(void) {
 737     // Flags to test CPU type.
 738     const uint32_t HS_EFL_AC           = 0x40000;
 739     const uint32_t HS_EFL_ID           = 0x200000;
 740     // Values for when we don't have a CPUID instruction.
 741     const int      CPU_FAMILY_SHIFT = 8;
 742     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 743     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 744 
 745     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 746 
 747     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 748 #   define __ _masm->
 749 
 750     address start = __ pc();
 751 
 752     //
 753     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 754     //
 755     // rcx and rdx are first and second argument registers on windows
 756 
 757     __ push(rbp);
 758     __ mov(rbp, c_rarg0); // cpuid_info address
 759     __ push(rbx);
 760     __ push(rsi);
 761     __ pushf();          // preserve rbx, and flags
 762     __ pop(rax);
 763     __ push(rax);
 764     __ mov(rcx, rax);
 765     //
 766     // if we are unable to change the AC flag, we have a 386
 767     //
 768     __ xorl(rax, HS_EFL_AC);
 769     __ push(rax);
 770     __ popf();
 771     __ pushf();
 772     __ pop(rax);
 773     __ cmpptr(rax, rcx);
 774     __ jccb(Assembler::notEqual, detect_486);
 775 
 776     __ movl(rax, CPU_FAMILY_386);
 777     __ jmp(done);
 778 
 779     //
 780     // If we are unable to change the ID flag, we have a 486 which does
 781     // not support the "cpuid" instruction.
 782     //
 783     __ bind(detect_486);
 784     __ mov(rax, rcx);
 785     __ xorl(rax, HS_EFL_ID);
 786     __ push(rax);
 787     __ popf();
 788     __ pushf();
 789     __ pop(rax);
 790     __ cmpptr(rcx, rax);
 791     __ jccb(Assembler::notEqual, detect_586);
 792 
 793     __ bind(cpu486);
 794     __ movl(rax, CPU_FAMILY_486);
 795     __ jmp(done);
 796 
 797     //
 798     // At this point, we have a chip which supports the "cpuid" instruction
 799     //
 800     __ bind(detect_586);
 801     __ xorl(rax, rax);
 802     __ cpuid();
 803     __ orl(rax, rax);
 804     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 805                                         // value of at least 1, we give up and
 806                                         // assume a 486
 807 
 808     //
 809     // Extended cpuid(0x80000000) for processor brand string detection
 810     //
 811     __ bind(ext_cpuid);
 812     __ movl(rax, CPUID_EXTENDED_FN);
 813     __ cpuid();
 814     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 815     __ jcc(Assembler::below, done);
 816 
 817     //
 818     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 819     //
 820     __ movl(rax, CPUID_EXTENDED_FN_2);
 821     __ cpuid();
 822     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 823     __ movl(Address(rsi, 0), rax);
 824     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 825     __ movl(Address(rsi, 0), rbx);
 826     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 827     __ movl(Address(rsi, 0), rcx);
 828     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 829     __ movl(Address(rsi,0), rdx);
 830 
 831     //
 832     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 833     //
 834     __ movl(rax, CPUID_EXTENDED_FN_3);
 835     __ cpuid();
 836     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 837     __ movl(Address(rsi, 0), rax);
 838     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 839     __ movl(Address(rsi, 0), rbx);
 840     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 841     __ movl(Address(rsi, 0), rcx);
 842     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 843     __ movl(Address(rsi,0), rdx);
 844 
 845     //
 846     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 847     //
 848     __ movl(rax, CPUID_EXTENDED_FN_4);
 849     __ cpuid();
 850     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 851     __ movl(Address(rsi, 0), rax);
 852     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 853     __ movl(Address(rsi, 0), rbx);
 854     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 855     __ movl(Address(rsi, 0), rcx);
 856     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 857     __ movl(Address(rsi,0), rdx);
 858 
 859     //
 860     // return
 861     //
 862     __ bind(done);
 863     __ popf();
 864     __ pop(rsi);
 865     __ pop(rbx);
 866     __ pop(rbp);
 867     __ ret(0);
 868 
 869 #   undef __
 870 
 871     return start;
 872   };
 873 };
 874 
 875 void VM_Version::get_processor_features() {
 876 
 877   _cpu = 4; // 486 by default
 878   _model = 0;
 879   _stepping = 0;
 880   _logical_processors_per_package = 1;
 881   // i486 internal cache is both I&D and has a 16-byte line size
 882   _L1_data_cache_line_size = 16;
 883 
 884   // Get raw processor info
 885 
 886   get_cpu_info_stub(&_cpuid_info);
 887 
 888   assert_is_initialized();
 889   _cpu = extended_cpu_family();
 890   _model = extended_cpu_model();
 891   _stepping = cpu_stepping();
 892 
 893   if (cpu_family() > 4) { // it supports CPUID
 894     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 895     _cpu_features = _features; // Preserve features
 896     // Logical processors are only available on P4s and above,
 897     // and only if hyperthreading is available.
 898     _logical_processors_per_package = logical_processor_count();
 899     _L1_data_cache_line_size = L1_line_size();
 900   }
 901 
 902   // xchg and xadd instructions
 903   _supports_atomic_getset4 = true;
 904   _supports_atomic_getadd4 = true;
 905   _supports_atomic_getset8 = true;
 906   _supports_atomic_getadd8 = true;
 907 
 908   // OS should support SSE for x64 and hardware should support at least SSE2.
 909   if (!VM_Version::supports_sse2()) {
 910     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 911   }
 912   // in 64 bit the use of SSE2 is the minimum
 913   if (UseSSE < 2) UseSSE = 2;
 914 
 915   // flush_icache_stub have to be generated first.
 916   // That is why Icache line size is hard coded in ICache class,
 917   // see icache_x86.hpp. It is also the reason why we can't use
 918   // clflush instruction in 32-bit VM since it could be running
 919   // on CPU which does not support it.
 920   //
 921   // The only thing we can do is to verify that flushed
 922   // ICache::line_size has correct value.
 923   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 924   // clflush_size is size in quadwords (8 bytes).
 925   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 926 
 927   // assigning this field effectively enables Unsafe.writebackMemory()
 928   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 929   // that is only implemented on x86_64 and only if the OS plays ball
 930   if (os::supports_map_sync()) {
 931     // publish data cache line flush size to generic field, otherwise
 932     // let if default to zero thereby disabling writeback
 933     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 934   }
 935 
 936   // Check if processor has Intel Ecore
 937   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
 938     (supports_hybrid() ||
 939      _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
 940      _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
 941     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 942   }
 943 
 944   if (UseSSE < 4) {
 945     _features.clear_feature(CPU_SSE4_1);
 946     _features.clear_feature(CPU_SSE4_2);
 947   }
 948 
 949   if (UseSSE < 3) {
 950     _features.clear_feature(CPU_SSE3);
 951     _features.clear_feature(CPU_SSSE3);
 952     _features.clear_feature(CPU_SSE4A);
 953   }
 954 
 955   if (UseSSE < 2)
 956     _features.clear_feature(CPU_SSE2);
 957 
 958   if (UseSSE < 1)
 959     _features.clear_feature(CPU_SSE);
 960 
 961   // ZX cpus specific settings
 962   if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
 963     if (cpu_family() == 7) {
 964       if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
 965         UseAVX = 1;
 966       } else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
 967         UseAVX = 0;
 968       }
 969     } else if (cpu_family() == 6) {
 970       UseAVX = 0;
 971     }
 972   }
 973 
 974   // UseSSE is set to the smaller of what hardware supports and what
 975   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 976   // older Pentiums which do not support it.
 977   int use_sse_limit = 0;
 978   if (UseSSE > 0) {
 979     if (UseSSE > 3 && supports_sse4_1()) {
 980       use_sse_limit = 4;
 981     } else if (UseSSE > 2 && supports_sse3()) {
 982       use_sse_limit = 3;
 983     } else if (UseSSE > 1 && supports_sse2()) {
 984       use_sse_limit = 2;
 985     } else if (UseSSE > 0 && supports_sse()) {
 986       use_sse_limit = 1;
 987     } else {
 988       use_sse_limit = 0;
 989     }
 990   }
 991   if (FLAG_IS_DEFAULT(UseSSE)) {
 992     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 993   } else if (UseSSE > use_sse_limit) {
 994     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 995     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 996   }
 997 
 998   // first try initial setting and detect what we can support
 999   int use_avx_limit = 0;
1000   if (UseAVX > 0) {
1001     if (UseSSE < 4) {
1002       // Don't use AVX if SSE is unavailable or has been disabled.
1003       use_avx_limit = 0;
1004     } else if (UseAVX > 2 && supports_evex()) {
1005       use_avx_limit = 3;
1006     } else if (UseAVX > 1 && supports_avx2()) {
1007       use_avx_limit = 2;
1008     } else if (UseAVX > 0 && supports_avx()) {
1009       use_avx_limit = 1;
1010     } else {
1011       use_avx_limit = 0;
1012     }
1013   }
1014   if (FLAG_IS_DEFAULT(UseAVX)) {
1015     // Don't use AVX-512 on older Skylakes unless explicitly requested.
1016     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1017       FLAG_SET_DEFAULT(UseAVX, 2);
1018     } else {
1019       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1020     }
1021   }
1022 
1023   if (UseAVX > use_avx_limit) {
1024     if (UseSSE < 4) {
1025       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1026     } else {
1027       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1028     }
1029     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1030   }
1031 
1032   if (UseAVX < 3) {
1033     _features.clear_feature(CPU_AVX512F);
1034     _features.clear_feature(CPU_AVX512DQ);
1035     _features.clear_feature(CPU_AVX512CD);
1036     _features.clear_feature(CPU_AVX512BW);
1037     _features.clear_feature(CPU_AVX512ER);
1038     _features.clear_feature(CPU_AVX512PF);
1039     _features.clear_feature(CPU_AVX512VL);
1040     _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1041     _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1042     _features.clear_feature(CPU_AVX512_VAES);
1043     _features.clear_feature(CPU_AVX512_VNNI);
1044     _features.clear_feature(CPU_AVX512_VBMI);
1045     _features.clear_feature(CPU_AVX512_VBMI2);
1046     _features.clear_feature(CPU_AVX512_BITALG);
1047     _features.clear_feature(CPU_AVX512_IFMA);
1048     _features.clear_feature(CPU_APX_F);
1049     _features.clear_feature(CPU_AVX512_FP16);
1050     _features.clear_feature(CPU_AVX10_1);
1051     _features.clear_feature(CPU_AVX10_2);
1052   }
1053 
1054 
1055   if (UseAVX < 2) {
1056     _features.clear_feature(CPU_AVX2);
1057     _features.clear_feature(CPU_AVX_IFMA);
1058   }
1059 
1060   if (UseAVX < 1) {
1061     _features.clear_feature(CPU_AVX);
1062     _features.clear_feature(CPU_VZEROUPPER);
1063     _features.clear_feature(CPU_F16C);
1064     _features.clear_feature(CPU_SHA512);
1065   }
1066 
1067   if (logical_processors_per_package() == 1) {
1068     // HT processor could be installed on a system which doesn't support HT.
1069     _features.clear_feature(CPU_HT);
1070   }
1071 
1072   if (is_intel()) { // Intel cpus specific settings
1073     if (is_knights_family()) {
1074       _features.clear_feature(CPU_VZEROUPPER);
1075       _features.clear_feature(CPU_AVX512BW);
1076       _features.clear_feature(CPU_AVX512VL);
1077       _features.clear_feature(CPU_APX_F);
1078       _features.clear_feature(CPU_AVX512DQ);
1079       _features.clear_feature(CPU_AVX512_VNNI);
1080       _features.clear_feature(CPU_AVX512_VAES);
1081       _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1082       _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1083       _features.clear_feature(CPU_AVX512_VBMI);
1084       _features.clear_feature(CPU_AVX512_VBMI2);
1085       _features.clear_feature(CPU_CLWB);
1086       _features.clear_feature(CPU_FLUSHOPT);
1087       _features.clear_feature(CPU_GFNI);
1088       _features.clear_feature(CPU_AVX512_BITALG);
1089       _features.clear_feature(CPU_AVX512_IFMA);
1090       _features.clear_feature(CPU_AVX_IFMA);
1091       _features.clear_feature(CPU_AVX512_FP16);
1092       _features.clear_feature(CPU_AVX10_1);
1093       _features.clear_feature(CPU_AVX10_2);
1094     }
1095   }
1096 
1097     // Currently APX support is only enabled for targets supporting AVX512VL feature.
1098   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1099   if (UseAPX && !apx_supported) {
1100     warning("UseAPX is not supported on this CPU, setting it to false");
1101     FLAG_SET_DEFAULT(UseAPX, false);
1102   }
1103 
1104   if (!UseAPX) {
1105     _features.clear_feature(CPU_APX_F);
1106   }
1107 
1108   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1109     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1110     FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1111   } else {
1112     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1113   }
1114 
1115   assert(supports_clflush(), "Always present");
1116   if (X86ICacheSync == -1) {
1117     // Auto-detect, choosing the best performant one that still flushes
1118     // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1119     if (supports_clwb()) {
1120       FLAG_SET_ERGO(X86ICacheSync, 3);
1121     } else if (supports_clflushopt()) {
1122       FLAG_SET_ERGO(X86ICacheSync, 2);
1123     } else {
1124       FLAG_SET_ERGO(X86ICacheSync, 1);
1125     }
1126   } else {
1127     if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1128       vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1129     }
1130     if ((X86ICacheSync == 3) && !supports_clwb()) {
1131       vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1132     }
1133     if ((X86ICacheSync == 5) && !supports_serialize()) {
1134       vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1135     }
1136   }
1137 
1138   stringStream ss(2048);
1139   if (supports_hybrid()) {
1140     ss.print("(hybrid)");
1141   } else {
1142     ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1143   }
1144   ss.print(" family %d model %d stepping %d microcode 0x%x",
1145            cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1146   ss.print(", ");
1147   int features_offset = (int)ss.size();
1148   insert_features_names(_features, ss);
1149 
1150   _cpu_info_string = ss.as_string(true);
1151   _features_string = _cpu_info_string + features_offset;
1152 
1153   // Use AES instructions if available.
1154   if (supports_aes()) {
1155     if (FLAG_IS_DEFAULT(UseAES)) {
1156       FLAG_SET_DEFAULT(UseAES, true);
1157     }
1158     if (!UseAES) {
1159       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1160         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1161       }
1162       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1163       if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1164         warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1165       }
1166       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1167     } else {
1168       if (UseSSE > 2) {
1169         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1170           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1171         }
1172       } else {
1173         // The AES intrinsic stubs require AES instruction support (of course)
1174         // but also require sse3 mode or higher for instructions it use.
1175         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1176           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1177         }
1178         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1179       }
1180 
1181       // --AES-CTR begins--
1182       if (!UseAESIntrinsics) {
1183         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1184           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1185         }
1186         FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1187       } else {
1188         if (supports_sse4_1()) {
1189           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1190             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1191           }
1192         } else {
1193            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1194            // but also require sse4.1 mode or higher for instructions it use.
1195           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1196              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1197            }
1198            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1199         }
1200       }
1201       // --AES-CTR ends--
1202     }
1203   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1204     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1205       warning("AES instructions are not available on this CPU");
1206     }
1207     FLAG_SET_DEFAULT(UseAES, false);
1208     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1209       warning("AES intrinsics are not available on this CPU");
1210     }
1211     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1212     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1213       warning("AES-CTR intrinsics are not available on this CPU");
1214     }
1215     FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1216   }
1217 
1218   // Use CLMUL instructions if available.
1219   if (supports_clmul()) {
1220     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1221       UseCLMUL = true;
1222     }
1223   } else if (UseCLMUL) {
1224     if (!FLAG_IS_DEFAULT(UseCLMUL))
1225       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1226     FLAG_SET_DEFAULT(UseCLMUL, false);
1227   }
1228 
1229   if (UseCLMUL && (UseSSE > 2)) {
1230     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1231       UseCRC32Intrinsics = true;
1232     }
1233   } else if (UseCRC32Intrinsics) {
1234     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1235       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1236     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1237   }
1238 
1239   if (supports_avx2()) {
1240     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1241       UseAdler32Intrinsics = true;
1242     }
1243   } else if (UseAdler32Intrinsics) {
1244     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1245       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1246     }
1247     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1248   }
1249 
1250   if (supports_sse4_2() && supports_clmul()) {
1251     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1252       UseCRC32CIntrinsics = true;
1253     }
1254   } else if (UseCRC32CIntrinsics) {
1255     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1256       warning("CRC32C intrinsics are not available on this CPU");
1257     }
1258     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1259   }
1260 
1261   // GHASH/GCM intrinsics
1262   if (UseCLMUL && (UseSSE > 2)) {
1263     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1264       UseGHASHIntrinsics = true;
1265     }
1266   } else if (UseGHASHIntrinsics) {
1267     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1268       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1269     }
1270     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1271   }
1272 
1273   // ChaCha20 Intrinsics
1274   // As long as the system supports AVX as a baseline we can do a
1275   // SIMD-enabled block function.  StubGenerator makes the determination
1276   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1277   // version.
1278   if (UseAVX >= 1) {
1279     if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1280       UseChaCha20Intrinsics = true;
1281     }
1282   } else if (UseChaCha20Intrinsics) {
1283     if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1284       warning("ChaCha20 intrinsic requires AVX instructions");
1285     }
1286     FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1287   }
1288 
1289   // Kyber Intrinsics
1290   // Currently we only have them for AVX512
1291   if (supports_evex() && supports_avx512bw()) {
1292     if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1293       UseKyberIntrinsics = true;
1294     }
1295   } else if (UseKyberIntrinsics) {
1296     if (!FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1297       warning("Intrinsics for ML-KEM are not available on this CPU.");
1298     }
1299     FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1300   }
1301 
1302   // Dilithium Intrinsics
1303   if (UseAVX > 1) {
1304       if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1305           UseDilithiumIntrinsics = true;
1306       }
1307   } else if (UseDilithiumIntrinsics) {
1308     if (!FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1309       warning("Intrinsics for ML-DSA are not available on this CPU.");
1310     }
1311     FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1312   }
1313 
1314   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1315   if (UseAVX >= 2) {
1316     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1317       UseBASE64Intrinsics = true;
1318     }
1319   } else if (UseBASE64Intrinsics) {
1320     if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1321       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1322     }
1323     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1324   }
1325 
1326   if (supports_fma()) {
1327     if (FLAG_IS_DEFAULT(UseFMA)) {
1328       UseFMA = true;
1329     }
1330   } else if (UseFMA) {
1331     if (!FLAG_IS_DEFAULT(UseFMA)) {
1332       warning("FMA instructions are not available on this CPU");
1333     }
1334     FLAG_SET_DEFAULT(UseFMA, false);
1335   }
1336 
1337   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1338     UseMD5Intrinsics = true;
1339   }
1340 
1341   if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1342     if (FLAG_IS_DEFAULT(UseSHA)) {
1343       UseSHA = true;
1344     }
1345   } else if (UseSHA) {
1346     if (!FLAG_IS_DEFAULT(UseSHA)) {
1347       warning("SHA instructions are not available on this CPU");
1348     }
1349     FLAG_SET_DEFAULT(UseSHA, false);
1350   }
1351 
1352   if (supports_sha() && supports_sse4_1() && UseSHA) {
1353     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1354       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1355     }
1356   } else if (UseSHA1Intrinsics) {
1357     if (!FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1358       warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1359     }
1360     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1361   }
1362 
1363   if (supports_sse4_1() && UseSHA) {
1364     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1365       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1366     }
1367   } else if (UseSHA256Intrinsics) {
1368     if (!FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1369       warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1370     }
1371     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1372   }
1373 
1374   if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1375     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1376       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1377     }
1378   } else if (UseSHA512Intrinsics) {
1379     if (!FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1380       warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1381     }
1382     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1383   }
1384 
1385   if (UseSHA && supports_evex() && supports_avx512bw()) {
1386     if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1387       FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1388     }
1389   } else if (UseSHA3Intrinsics) {
1390     if (!FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1391       warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1392     }
1393     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1394   }
1395 
1396   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics || UseSHA3Intrinsics)) {
1397     FLAG_SET_DEFAULT(UseSHA, false);
1398   }
1399 
1400 #if COMPILER2_OR_JVMCI
1401   int max_vector_size = 0;
1402   if (UseAVX == 0 || !os_supports_avx_vectors()) {
1403     // 16 byte vectors (in XMM) are supported with SSE2+
1404     max_vector_size = 16;
1405   } else if (UseAVX == 1 || UseAVX == 2) {
1406     // 32 bytes vectors (in YMM) are only supported with AVX+
1407     max_vector_size = 32;
1408   } else if (UseAVX > 2) {
1409     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1410     max_vector_size = 64;
1411   }
1412 
1413   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1414 
1415   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1416     if (MaxVectorSize < min_vector_size) {
1417       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1418       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1419     }
1420     if (MaxVectorSize > max_vector_size) {
1421       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1422       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1423     }
1424     if (!is_power_of_2(MaxVectorSize)) {
1425       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1426       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1427     }
1428   } else {
1429     // If default, use highest supported configuration
1430     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1431   }
1432 
1433 #if defined(COMPILER2) && defined(ASSERT)
1434   if (MaxVectorSize > 0) {
1435     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1436       tty->print_cr("State of YMM registers after signal handle:");
1437       int nreg = 4;
1438       const char* ymm_name[4] = {"0", "7", "8", "15"};
1439       for (int i = 0; i < nreg; i++) {
1440         tty->print("YMM%s:", ymm_name[i]);
1441         for (int j = 7; j >=0; j--) {
1442           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1443         }
1444         tty->cr();
1445       }
1446     }
1447   }
1448 #endif // COMPILER2 && ASSERT
1449 
1450   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1451     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1452       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1453     }
1454   } else if (UsePoly1305Intrinsics) {
1455     if (!FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1456       warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1457     }
1458     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1459   }
1460 
1461   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1462     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1463       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1464     }
1465   } else if (UseIntPolyIntrinsics) {
1466     if (!FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1467       warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1468     }
1469     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1470   }
1471 
1472   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1473     UseMultiplyToLenIntrinsic = true;
1474   }
1475   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1476     UseSquareToLenIntrinsic = true;
1477   }
1478   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1479     UseMulAddIntrinsic = true;
1480   }
1481   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1482     UseMontgomeryMultiplyIntrinsic = true;
1483   }
1484   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1485     UseMontgomerySquareIntrinsic = true;
1486   }
1487 #endif // COMPILER2_OR_JVMCI
1488 
1489   // On new cpus instructions which update whole XMM register should be used
1490   // to prevent partial register stall due to dependencies on high half.
1491   //
1492   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1493   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1494   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1495   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1496 
1497 
1498   if (is_zx()) { // ZX cpus specific settings
1499     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1500       UseStoreImmI16 = false; // don't use it on ZX cpus
1501     }
1502     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1503       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1504         // Use it on all ZX cpus
1505         UseAddressNop = true;
1506       }
1507     }
1508     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1509       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1510     }
1511     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1512       if (supports_sse3()) {
1513         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1514       } else {
1515         UseXmmRegToRegMoveAll = false;
1516       }
1517     }
1518     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1519 #ifdef COMPILER2
1520       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1521         // For new ZX cpus do the next optimization:
1522         // don't align the beginning of a loop if there are enough instructions
1523         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1524         // in current fetch line (OptoLoopAlignment) or the padding
1525         // is big (> MaxLoopPad).
1526         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1527         // generated NOP instructions. 11 is the largest size of one
1528         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1529         MaxLoopPad = 11;
1530       }
1531 #endif // COMPILER2
1532       if (supports_sse4_2()) { // new ZX cpus
1533         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1534           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1535         }
1536       }
1537     }
1538 
1539     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1540       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1541     }
1542   }
1543 
1544   if (is_amd_family()) { // AMD cpus specific settings
1545     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1546       // Use it on new AMD cpus starting from Opteron.
1547       UseAddressNop = true;
1548     }
1549     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1550       if (supports_sse4a()) {
1551         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1552       } else {
1553         UseXmmLoadAndClearUpper = false;
1554       }
1555     }
1556     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1557       if (supports_sse4a()) {
1558         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1559       } else {
1560         UseXmmRegToRegMoveAll = false;
1561       }
1562     }
1563     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1564       if (supports_sse4a()) {
1565         UseXmmI2F = true;
1566       } else {
1567         UseXmmI2F = false;
1568       }
1569     }
1570     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1571       if (supports_sse4a()) {
1572         UseXmmI2D = true;
1573       } else {
1574         UseXmmI2D = false;
1575       }
1576     }
1577 
1578     // some defaults for AMD family 15h
1579     if (cpu_family() == 0x15) {
1580       // On family 15h processors default is no sw prefetch
1581       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1582         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1583       }
1584       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1585       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1586         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1587       }
1588       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1589         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1590       }
1591     }
1592 
1593 #ifdef COMPILER2
1594     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1595       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1596       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1597     }
1598 #endif // COMPILER2
1599 
1600     // Some defaults for AMD family >= 17h && Hygon family 18h
1601     if (cpu_family() >= 0x17) {
1602       // On family >=17h processors use XMM and UnalignedLoadStores
1603       // for Array Copy
1604       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1605         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1606       }
1607 #ifdef COMPILER2
1608       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1609         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1610       }
1611 #endif
1612     }
1613   }
1614 
1615   if (is_intel()) { // Intel cpus specific settings
1616     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1617       UseStoreImmI16 = false; // don't use it on Intel cpus
1618     }
1619     if (is_intel_server_family() || cpu_family() == 15) {
1620       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1621         // Use it on all Intel cpus starting from PentiumPro
1622         UseAddressNop = true;
1623       }
1624     }
1625     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1626       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1627     }
1628     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1629       if (supports_sse3()) {
1630         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1631       } else {
1632         UseXmmRegToRegMoveAll = false;
1633       }
1634     }
1635     if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1636 #ifdef COMPILER2
1637       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1638         // For new Intel cpus do the next optimization:
1639         // don't align the beginning of a loop if there are enough instructions
1640         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1641         // in current fetch line (OptoLoopAlignment) or the padding
1642         // is big (> MaxLoopPad).
1643         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1644         // generated NOP instructions. 11 is the largest size of one
1645         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1646         MaxLoopPad = 11;
1647       }
1648 #endif // COMPILER2
1649 
1650       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1651         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1652           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1653         }
1654       }
1655     }
1656     if (is_atom_family() || is_knights_family()) {
1657 #ifdef COMPILER2
1658       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1659         OptoScheduling = true;
1660       }
1661 #endif
1662       if (supports_sse4_2()) { // Silvermont
1663         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1664           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1665         }
1666       }
1667       if (FLAG_IS_DEFAULT(UseIncDec)) {
1668         FLAG_SET_DEFAULT(UseIncDec, false);
1669       }
1670     }
1671     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1672       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1673     }
1674   }
1675 
1676 #ifdef COMPILER2
1677   if (UseAVX > 2) {
1678     if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1679         (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1680          ArrayOperationPartialInlineSize != 0 &&
1681          ArrayOperationPartialInlineSize != 16 &&
1682          ArrayOperationPartialInlineSize != 32 &&
1683          ArrayOperationPartialInlineSize != 64)) {
1684       int inline_size = 0;
1685       if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1686         inline_size = 64;
1687       } else if (MaxVectorSize >= 32) {
1688         inline_size = 32;
1689       } else if (MaxVectorSize >= 16) {
1690         inline_size = 16;
1691       }
1692       if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1693         warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1694       }
1695       ArrayOperationPartialInlineSize = inline_size;
1696     }
1697 
1698     if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1699       ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1700       if (ArrayOperationPartialInlineSize) {
1701         warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1702       } else {
1703         warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1704       }
1705     }
1706   }
1707 
1708   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1709     if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1710       OptimizeFill = false;
1711     }
1712   }
1713 #endif
1714   if (supports_sse4_2()) {
1715     if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1716       FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1717     }
1718   } else if (UseSSE42Intrinsics) {
1719     if (!FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1720       warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1721     }
1722     FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1723   }
1724   if (UseSSE42Intrinsics) {
1725     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1726       UseVectorizedMismatchIntrinsic = true;
1727     }
1728   } else if (UseVectorizedMismatchIntrinsic) {
1729     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1730       warning("vectorizedMismatch intrinsics are not available on this CPU");
1731     }
1732     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1733   }
1734   if (UseAVX >= 2) {
1735     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1736   } else if (UseVectorizedHashCodeIntrinsic) {
1737     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1738       warning("vectorizedHashCode intrinsics are not available on this CPU");
1739     }
1740     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1741   }
1742 
1743   // Use count leading zeros count instruction if available.
1744   if (supports_lzcnt()) {
1745     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1746       UseCountLeadingZerosInstruction = true;
1747     }
1748    } else if (UseCountLeadingZerosInstruction) {
1749     if (!FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1750       warning("lzcnt instruction is not available on this CPU");
1751     }
1752     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1753   }
1754 
1755   // Use count trailing zeros instruction if available
1756   if (supports_bmi1()) {
1757     // tzcnt does not require VEX prefix
1758     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1759       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1760         // Don't use tzcnt if BMI1 is switched off on command line.
1761         UseCountTrailingZerosInstruction = false;
1762       } else {
1763         UseCountTrailingZerosInstruction = true;
1764       }
1765     }
1766   } else if (UseCountTrailingZerosInstruction) {
1767     if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1768       warning("tzcnt instruction is not available on this CPU");
1769     }
1770     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1771   }
1772 
1773   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1774   // VEX prefix is generated only when AVX > 0.
1775   if (supports_bmi1() && supports_avx()) {
1776     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1777       UseBMI1Instructions = true;
1778     }
1779   } else if (UseBMI1Instructions) {
1780     if (!FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1781       warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1782     }
1783     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1784   }
1785 
1786   if (supports_bmi2() && supports_avx()) {
1787     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1788       UseBMI2Instructions = true;
1789     }
1790   } else if (UseBMI2Instructions) {
1791     if (!FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1792       warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1793     }
1794     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1795   }
1796 
1797   // Use population count instruction if available.
1798   if (supports_popcnt()) {
1799     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1800       UsePopCountInstruction = true;
1801     }
1802   } else if (UsePopCountInstruction) {
1803     if (!FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1804       warning("POPCNT instruction is not available on this CPU");
1805     }
1806     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1807   }
1808 
1809   // Use fast-string operations if available.
1810   if (supports_erms()) {
1811     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1812       UseFastStosb = true;
1813     }
1814   } else if (UseFastStosb) {
1815     if (!FLAG_IS_DEFAULT(UseFastStosb)) {
1816       warning("fast-string operations are not available on this CPU");
1817     }
1818     FLAG_SET_DEFAULT(UseFastStosb, false);
1819   }
1820 
1821   // For AMD Processors use XMM/YMM MOVDQU instructions
1822   // for Object Initialization as default
1823   if (is_amd() && cpu_family() >= 0x19) {
1824     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1825       UseFastStosb = false;
1826     }
1827   }
1828 
1829 #ifdef COMPILER2
1830   if (is_intel() && MaxVectorSize > 16) {
1831     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1832       UseFastStosb = false;
1833     }
1834   }
1835 #endif
1836 
1837   // Use XMM/YMM MOVDQU instruction for Object Initialization
1838   if (!UseFastStosb && UseUnalignedLoadStores) {
1839     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1840       UseXMMForObjInit = true;
1841     }
1842   } else if (UseXMMForObjInit) {
1843     if (!FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1844       warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1845     }
1846     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1847   }
1848 
1849 #ifdef COMPILER2
1850   if (FLAG_IS_DEFAULT(AlignVector)) {
1851     // Modern processors allow misaligned memory operations for vectors.
1852     AlignVector = !UseUnalignedLoadStores;
1853   }
1854 #endif // COMPILER2
1855 
1856   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1857     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1858       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1859     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1860       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1861     }
1862   }
1863 
1864   // Allocation prefetch settings
1865   int cache_line_size = checked_cast<int>(prefetch_data_size());
1866   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1867       (cache_line_size > AllocatePrefetchStepSize)) {
1868     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1869   }
1870 
1871   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1872     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1873     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1874       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1875     }
1876     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1877   }
1878 
1879   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1880     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1881     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1882   }
1883 
1884   if (is_intel() && is_intel_server_family() && supports_sse3()) {
1885     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1886         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1887       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1888     }
1889 #ifdef COMPILER2
1890     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1891       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1892     }
1893 #endif
1894   }
1895 
1896   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1897 #ifdef COMPILER2
1898     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1899       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1900     }
1901 #endif
1902   }
1903 
1904   // Prefetch settings
1905 
1906   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1907   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1908   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1909   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1910 
1911   // gc copy/scan is disabled if prefetchw isn't supported, because
1912   // Prefetch::write emits an inlined prefetchw on Linux.
1913   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1914   // The used prefetcht0 instruction works for both amd64 and em64t.
1915 
1916   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1917     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1918   }
1919   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1920     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1921   }
1922 
1923   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1924      (cache_line_size > ContendedPaddingWidth))
1925     ContendedPaddingWidth = cache_line_size;
1926 
1927   // This machine allows unaligned memory accesses
1928   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1929     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1930   }
1931 
1932 #ifndef PRODUCT
1933   if (log_is_enabled(Info, os, cpu)) {
1934     LogStream ls(Log(os, cpu)::info());
1935     outputStream* log = &ls;
1936     log->print_cr("Logical CPUs per core: %u",
1937                   logical_processors_per_package());
1938     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1939     log->print("UseSSE=%d", UseSSE);
1940     if (UseAVX > 0) {
1941       log->print("  UseAVX=%d", UseAVX);
1942     }
1943     if (UseAES) {
1944       log->print("  UseAES=1");
1945     }
1946 #ifdef COMPILER2
1947     if (MaxVectorSize > 0) {
1948       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1949     }
1950 #endif
1951     log->cr();
1952     log->print("Allocation");
1953     if (AllocatePrefetchStyle <= 0) {
1954       log->print_cr(": no prefetching");
1955     } else {
1956       log->print(" prefetching: ");
1957       if (AllocatePrefetchInstr == 0) {
1958         log->print("PREFETCHNTA");
1959       } else if (AllocatePrefetchInstr == 1) {
1960         log->print("PREFETCHT0");
1961       } else if (AllocatePrefetchInstr == 2) {
1962         log->print("PREFETCHT2");
1963       } else if (AllocatePrefetchInstr == 3) {
1964         log->print("PREFETCHW");
1965       }
1966       if (AllocatePrefetchLines > 1) {
1967         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1968       } else {
1969         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1970       }
1971     }
1972 
1973     if (PrefetchCopyIntervalInBytes > 0) {
1974       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1975     }
1976     if (PrefetchScanIntervalInBytes > 0) {
1977       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1978     }
1979     if (ContendedPaddingWidth > 0) {
1980       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1981     }
1982   }
1983 #endif // !PRODUCT
1984   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1985       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1986   }
1987   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1988       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1989   }
1990   // CopyAVX3Threshold is the threshold at which 64-byte instructions are used
1991   // for implementing the array copy and clear operations.
1992   // The Intel platforms that supports the serialize instruction
1993   // have improved implementation of 64-byte load/stores and so the default
1994   // threshold is set to 0 for these platforms.
1995   if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
1996     if (is_intel() && is_intel_server_family() && supports_serialize()) {
1997       FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1998     } else {
1999       FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
2000     }
2001   }
2002 }
2003 
2004 void VM_Version::print_platform_virtualization_info(outputStream* st) {
2005   VirtualizationType vrt = VM_Version::get_detected_virtualization();
2006   if (vrt == XenHVM) {
2007     st->print_cr("Xen hardware-assisted virtualization detected");
2008   } else if (vrt == KVM) {
2009     st->print_cr("KVM virtualization detected");
2010   } else if (vrt == VMWare) {
2011     st->print_cr("VMWare virtualization detected");
2012     VirtualizationSupport::print_virtualization_info(st);
2013   } else if (vrt == HyperV) {
2014     st->print_cr("Hyper-V virtualization detected");
2015   } else if (vrt == HyperVRole) {
2016     st->print_cr("Hyper-V role detected");
2017   }
2018 }
2019 
2020 bool VM_Version::compute_has_intel_jcc_erratum() {
2021   if (!is_intel_family_core()) {
2022     // Only Intel CPUs are affected.
2023     return false;
2024   }
2025   // The following table of affected CPUs is based on the following document released by Intel:
2026   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2027   switch (_model) {
2028   case 0x8E:
2029     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2030     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2031     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2032     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2033     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2034     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2035     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2036     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2037     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2038     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2039   case 0x4E:
2040     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2041     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2042     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2043     return _stepping == 0x3;
2044   case 0x55:
2045     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2046     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2047     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2048     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2049     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2050     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2051     return _stepping == 0x4 || _stepping == 0x7;
2052   case 0x5E:
2053     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2054     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2055     return _stepping == 0x3;
2056   case 0x9E:
2057     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2058     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2059     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2060     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2061     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2062     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2063     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2064     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2065     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2066     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2067     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2068     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2069     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2070     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2071     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2072   case 0xA5:
2073     // Not in Intel documentation.
2074     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2075     return true;
2076   case 0xA6:
2077     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2078     return _stepping == 0x0;
2079   case 0xAE:
2080     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2081     return _stepping == 0xA;
2082   default:
2083     // If we are running on another intel machine not recognized in the table, we are okay.
2084     return false;
2085   }
2086 }
2087 
2088 // On Xen, the cpuid instruction returns
2089 //  eax / registers[0]: Version of Xen
2090 //  ebx / registers[1]: chars 'XenV'
2091 //  ecx / registers[2]: chars 'MMXe'
2092 //  edx / registers[3]: chars 'nVMM'
2093 //
2094 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2095 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2096 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2097 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2098 //
2099 // more information :
2100 // https://kb.vmware.com/s/article/1009458
2101 //
2102 void VM_Version::check_virtualizations() {
2103   uint32_t registers[4] = {0};
2104   char signature[13] = {0};
2105 
2106   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2107   // from 0x40000000 until 0x40010000.
2108   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2109   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2110     detect_virt_stub(leaf, registers);
2111     memcpy(signature, &registers[1], 12);
2112 
2113     if (strncmp("VMwareVMware", signature, 12) == 0) {
2114       Abstract_VM_Version::_detected_virtualization = VMWare;
2115       // check for extended metrics from guestlib
2116       VirtualizationSupport::initialize();
2117     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2118       Abstract_VM_Version::_detected_virtualization = HyperV;
2119 #ifdef _WINDOWS
2120       // CPUID leaf 0x40000007 is available to the root partition only.
2121       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2122       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2123       detect_virt_stub(0x40000007, registers);
2124       if ((registers[0] != 0x0) ||
2125           (registers[1] != 0x0) ||
2126           (registers[2] != 0x0) ||
2127           (registers[3] != 0x0)) {
2128         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2129       }
2130 #endif
2131     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2132       Abstract_VM_Version::_detected_virtualization = KVM;
2133     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2134       Abstract_VM_Version::_detected_virtualization = XenHVM;
2135     }
2136   }
2137 }
2138 
2139 #ifdef COMPILER2
2140 // Determine if it's running on Cascade Lake using default options.
2141 bool VM_Version::is_default_intel_cascade_lake() {
2142   return FLAG_IS_DEFAULT(UseAVX) &&
2143          FLAG_IS_DEFAULT(MaxVectorSize) &&
2144          UseAVX > 2 &&
2145          is_intel_cascade_lake();
2146 }
2147 #endif
2148 
2149 bool VM_Version::is_intel_cascade_lake() {
2150   return is_intel_skylake() && _stepping >= 5;
2151 }
2152 
2153 bool VM_Version::is_intel_darkmont() {
2154   return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2155 }
2156 
2157 void VM_Version::clear_apx_test_state() {
2158   clear_apx_test_state_stub();
2159 }
2160 
2161 static bool _vm_version_initialized = false;
2162 
2163 void VM_Version::initialize() {
2164   ResourceMark rm;
2165 
2166   // Making this stub must be FIRST use of assembler
2167   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2168   if (stub_blob == nullptr) {
2169     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2170   }
2171   CodeBuffer c(stub_blob);
2172   VM_Version_StubGenerator g(&c);
2173 
2174   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2175                                      g.generate_get_cpu_info());
2176   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2177                                      g.generate_detect_virt());
2178   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2179                                      g.clear_apx_test_state());
2180   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2181                                      g.generate_getCPUIDBrandString());
2182   get_processor_features();
2183 
2184   Assembler::precompute_instructions();
2185 
2186   if (VM_Version::supports_hv()) { // Supports hypervisor
2187     check_virtualizations();
2188   }
2189   _vm_version_initialized = true;
2190 }
2191 
2192 typedef enum {
2193    CPU_FAMILY_8086_8088  = 0,
2194    CPU_FAMILY_INTEL_286  = 2,
2195    CPU_FAMILY_INTEL_386  = 3,
2196    CPU_FAMILY_INTEL_486  = 4,
2197    CPU_FAMILY_PENTIUM    = 5,
2198    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2199    CPU_FAMILY_PENTIUM_4  = 0xF
2200 } FamilyFlag;
2201 
2202 typedef enum {
2203   RDTSCP_FLAG  = 0x08000000, // bit 27
2204   INTEL64_FLAG = 0x20000000  // bit 29
2205 } _featureExtendedEdxFlag;
2206 
2207 typedef enum {
2208    FPU_FLAG     = 0x00000001,
2209    VME_FLAG     = 0x00000002,
2210    DE_FLAG      = 0x00000004,
2211    PSE_FLAG     = 0x00000008,
2212    TSC_FLAG     = 0x00000010,
2213    MSR_FLAG     = 0x00000020,
2214    PAE_FLAG     = 0x00000040,
2215    MCE_FLAG     = 0x00000080,
2216    CX8_FLAG     = 0x00000100,
2217    APIC_FLAG    = 0x00000200,
2218    SEP_FLAG     = 0x00000800,
2219    MTRR_FLAG    = 0x00001000,
2220    PGE_FLAG     = 0x00002000,
2221    MCA_FLAG     = 0x00004000,
2222    CMOV_FLAG    = 0x00008000,
2223    PAT_FLAG     = 0x00010000,
2224    PSE36_FLAG   = 0x00020000,
2225    PSNUM_FLAG   = 0x00040000,
2226    CLFLUSH_FLAG = 0x00080000,
2227    DTS_FLAG     = 0x00200000,
2228    ACPI_FLAG    = 0x00400000,
2229    MMX_FLAG     = 0x00800000,
2230    FXSR_FLAG    = 0x01000000,
2231    SSE_FLAG     = 0x02000000,
2232    SSE2_FLAG    = 0x04000000,
2233    SS_FLAG      = 0x08000000,
2234    HTT_FLAG     = 0x10000000,
2235    TM_FLAG      = 0x20000000
2236 } FeatureEdxFlag;
2237 
2238 // VM_Version statics
2239 enum {
2240   ExtendedFamilyIdLength_INTEL = 16,
2241   ExtendedFamilyIdLength_AMD   = 24
2242 };
2243 
2244 const size_t VENDOR_LENGTH = 13;
2245 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2246 static char* _cpu_brand_string = nullptr;
2247 static int64_t _max_qualified_cpu_frequency = 0;
2248 
2249 static int _no_of_threads = 0;
2250 static int _no_of_cores = 0;
2251 
2252 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2253   "8086/8088",
2254   "",
2255   "286",
2256   "386",
2257   "486",
2258   "Pentium",
2259   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2260   "",
2261   "",
2262   "",
2263   "",
2264   "",
2265   "",
2266   "",
2267   "",
2268   "Pentium 4"
2269 };
2270 
2271 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2272   "",
2273   "",
2274   "",
2275   "",
2276   "5x86",
2277   "K5/K6",
2278   "Athlon/AthlonXP",
2279   "",
2280   "",
2281   "",
2282   "",
2283   "",
2284   "",
2285   "",
2286   "",
2287   "Opteron/Athlon64",
2288   "Opteron QC/Phenom",  // Barcelona et.al.
2289   "",
2290   "",
2291   "",
2292   "",
2293   "",
2294   "",
2295   "Zen"
2296 };
2297 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2298 // September 2013, Vol 3C Table 35-1
2299 const char* const _model_id_pentium_pro[] = {
2300   "",
2301   "Pentium Pro",
2302   "",
2303   "Pentium II model 3",
2304   "",
2305   "Pentium II model 5/Xeon/Celeron",
2306   "Celeron",
2307   "Pentium III/Pentium III Xeon",
2308   "Pentium III/Pentium III Xeon",
2309   "Pentium M model 9",    // Yonah
2310   "Pentium III, model A",
2311   "Pentium III, model B",
2312   "",
2313   "Pentium M model D",    // Dothan
2314   "",
2315   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2316   "",
2317   "",
2318   "",
2319   "",
2320   "",
2321   "",
2322   "Celeron",              // 0x16 Celeron 65nm
2323   "Core 2",               // 0x17 Penryn / Harpertown
2324   "",
2325   "",
2326   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2327   "Atom",                 // 0x1B Z5xx series Silverthorn
2328   "",
2329   "Core 2",               // 0x1D Dunnington (6-core)
2330   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2331   "",
2332   "",
2333   "",
2334   "",
2335   "",
2336   "",
2337   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2338   "",
2339   "",
2340   "",                     // 0x28
2341   "",
2342   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2343   "",
2344   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2345   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2346   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2347   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2348   "",
2349   "",
2350   "",
2351   "",
2352   "",
2353   "",
2354   "",
2355   "",
2356   "",
2357   "",
2358   "Ivy Bridge",           // 0x3a
2359   "",
2360   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2361   "",                     // 0x3d "Next Generation Intel Core Processor"
2362   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2363   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2364   "",
2365   "",
2366   "",
2367   "",
2368   "",
2369   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2370   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2371   nullptr
2372 };
2373 
2374 /* Brand ID is for back compatibility
2375  * Newer CPUs uses the extended brand string */
2376 const char* const _brand_id[] = {
2377   "",
2378   "Celeron processor",
2379   "Pentium III processor",
2380   "Intel Pentium III Xeon processor",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "Intel Pentium 4 processor",
2386   nullptr
2387 };
2388 
2389 
2390 const char* const _feature_edx_id[] = {
2391   "On-Chip FPU",
2392   "Virtual Mode Extensions",
2393   "Debugging Extensions",
2394   "Page Size Extensions",
2395   "Time Stamp Counter",
2396   "Model Specific Registers",
2397   "Physical Address Extension",
2398   "Machine Check Exceptions",
2399   "CMPXCHG8B Instruction",
2400   "On-Chip APIC",
2401   "",
2402   "Fast System Call",
2403   "Memory Type Range Registers",
2404   "Page Global Enable",
2405   "Machine Check Architecture",
2406   "Conditional Mov Instruction",
2407   "Page Attribute Table",
2408   "36-bit Page Size Extension",
2409   "Processor Serial Number",
2410   "CLFLUSH Instruction",
2411   "",
2412   "Debug Trace Store feature",
2413   "ACPI registers in MSR space",
2414   "Intel Architecture MMX Technology",
2415   "Fast Float Point Save and Restore",
2416   "Streaming SIMD extensions",
2417   "Streaming SIMD extensions 2",
2418   "Self-Snoop",
2419   "Hyper Threading",
2420   "Thermal Monitor",
2421   "",
2422   "Pending Break Enable"
2423 };
2424 
2425 const char* const _feature_extended_edx_id[] = {
2426   "",
2427   "",
2428   "",
2429   "",
2430   "",
2431   "",
2432   "",
2433   "",
2434   "",
2435   "",
2436   "",
2437   "SYSCALL/SYSRET",
2438   "",
2439   "",
2440   "",
2441   "",
2442   "",
2443   "",
2444   "",
2445   "",
2446   "Execute Disable Bit",
2447   "",
2448   "",
2449   "",
2450   "",
2451   "",
2452   "",
2453   "RDTSCP",
2454   "",
2455   "Intel 64 Architecture",
2456   "",
2457   ""
2458 };
2459 
2460 const char* const _feature_ecx_id[] = {
2461   "Streaming SIMD Extensions 3",
2462   "PCLMULQDQ",
2463   "64-bit DS Area",
2464   "MONITOR/MWAIT instructions",
2465   "CPL Qualified Debug Store",
2466   "Virtual Machine Extensions",
2467   "Safer Mode Extensions",
2468   "Enhanced Intel SpeedStep technology",
2469   "Thermal Monitor 2",
2470   "Supplemental Streaming SIMD Extensions 3",
2471   "L1 Context ID",
2472   "",
2473   "Fused Multiply-Add",
2474   "CMPXCHG16B",
2475   "xTPR Update Control",
2476   "Perfmon and Debug Capability",
2477   "",
2478   "Process-context identifiers",
2479   "Direct Cache Access",
2480   "Streaming SIMD extensions 4.1",
2481   "Streaming SIMD extensions 4.2",
2482   "x2APIC",
2483   "MOVBE",
2484   "Popcount instruction",
2485   "TSC-Deadline",
2486   "AESNI",
2487   "XSAVE",
2488   "OSXSAVE",
2489   "AVX",
2490   "F16C",
2491   "RDRAND",
2492   ""
2493 };
2494 
2495 const char* const _feature_extended_ecx_id[] = {
2496   "LAHF/SAHF instruction support",
2497   "Core multi-processor legacy mode",
2498   "",
2499   "",
2500   "",
2501   "Advanced Bit Manipulations: LZCNT",
2502   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2503   "Misaligned SSE mode",
2504   "",
2505   "",
2506   "",
2507   "",
2508   "",
2509   "",
2510   "",
2511   "",
2512   "",
2513   "",
2514   "",
2515   "",
2516   "",
2517   "",
2518   "",
2519   "",
2520   "",
2521   "",
2522   "",
2523   "",
2524   "",
2525   "",
2526   "",
2527   ""
2528 };
2529 
2530 const char* VM_Version::cpu_model_description(void) {
2531   uint32_t cpu_family = extended_cpu_family();
2532   uint32_t cpu_model = extended_cpu_model();
2533   const char* model = nullptr;
2534 
2535   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2536     for (uint32_t i = 0; i <= cpu_model; i++) {
2537       model = _model_id_pentium_pro[i];
2538       if (model == nullptr) {
2539         break;
2540       }
2541     }
2542   }
2543   return model;
2544 }
2545 
2546 const char* VM_Version::cpu_brand_string(void) {
2547   if (_cpu_brand_string == nullptr) {
2548     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2549     if (nullptr == _cpu_brand_string) {
2550       return nullptr;
2551     }
2552     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2553     if (ret_val != OS_OK) {
2554       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2555       _cpu_brand_string = nullptr;
2556     }
2557   }
2558   return _cpu_brand_string;
2559 }
2560 
2561 const char* VM_Version::cpu_brand(void) {
2562   const char*  brand  = nullptr;
2563 
2564   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2565     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2566     brand = _brand_id[0];
2567     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2568       brand = _brand_id[i];
2569     }
2570   }
2571   return brand;
2572 }
2573 
2574 bool VM_Version::cpu_is_em64t(void) {
2575   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2576 }
2577 
2578 bool VM_Version::is_netburst(void) {
2579   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2580 }
2581 
2582 bool VM_Version::supports_tscinv_ext(void) {
2583   if (!supports_tscinv_bit()) {
2584     return false;
2585   }
2586 
2587   if (is_intel()) {
2588     return true;
2589   }
2590 
2591   if (is_amd()) {
2592     return !is_amd_Barcelona();
2593   }
2594 
2595   if (is_hygon()) {
2596     return true;
2597   }
2598 
2599   return false;
2600 }
2601 
2602 void VM_Version::resolve_cpu_information_details(void) {
2603 
2604   // in future we want to base this information on proper cpu
2605   // and cache topology enumeration such as:
2606   // Intel 64 Architecture Processor Topology Enumeration
2607   // which supports system cpu and cache topology enumeration
2608   // either using 2xAPICIDs or initial APICIDs
2609 
2610   // currently only rough cpu information estimates
2611   // which will not necessarily reflect the exact configuration of the system
2612 
2613   // this is the number of logical hardware threads
2614   // visible to the operating system
2615   _no_of_threads = os::processor_count();
2616 
2617   // find out number of threads per cpu package
2618   int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2619   if (threads_per_package == 0) {
2620     // Fallback code to avoid div by zero in subsequent code.
2621     // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2622     threads_per_package = threads_per_core() * cores_per_cpu();
2623   }
2624 
2625   // use amount of threads visible to the process in order to guess number of sockets
2626   _no_of_sockets = _no_of_threads / threads_per_package;
2627 
2628   // process might only see a subset of the total number of threads
2629   // from a single processor package. Virtualization/resource management for example.
2630   // If so then just write a hard 1 as num of pkgs.
2631   if (0 == _no_of_sockets) {
2632     _no_of_sockets = 1;
2633   }
2634 
2635   // estimate the number of cores
2636   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2637 }
2638 
2639 
2640 const char* VM_Version::cpu_family_description(void) {
2641   int cpu_family_id = extended_cpu_family();
2642   if (is_amd()) {
2643     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2644       return _family_id_amd[cpu_family_id];
2645     }
2646   }
2647   if (is_intel()) {
2648     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2649       return cpu_model_description();
2650     }
2651     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2652       return _family_id_intel[cpu_family_id];
2653     }
2654   }
2655   if (is_zx()) {
2656     int cpu_model_id = extended_cpu_model();
2657     if (cpu_family_id == 7) {
2658       switch (cpu_model_id) {
2659         case 0x1B:
2660           return "wudaokou";
2661         case 0x3B:
2662           return "lujiazui";
2663         case 0x5B:
2664           return "yongfeng";
2665         case 0x6B:
2666           return "shijidadao";
2667       }
2668     } else if (cpu_family_id == 6) {
2669       return "zhangjiang";
2670     }
2671   }
2672   if (is_hygon()) {
2673     return "Dhyana";
2674   }
2675   return "Unknown x86";
2676 }
2677 
2678 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2679   assert(buf != nullptr, "buffer is null!");
2680   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2681 
2682   const char* cpu_type = nullptr;
2683   const char* x64 = nullptr;
2684 
2685   if (is_intel()) {
2686     cpu_type = "Intel";
2687     x64 = cpu_is_em64t() ? " Intel64" : "";
2688   } else if (is_amd()) {
2689     cpu_type = "AMD";
2690     x64 = cpu_is_em64t() ? " AMD64" : "";
2691   } else if (is_zx()) {
2692     cpu_type = "Zhaoxin";
2693     x64 = cpu_is_em64t() ? " x86_64" : "";
2694   } else if (is_hygon()) {
2695     cpu_type = "Hygon";
2696     x64 = cpu_is_em64t() ? " AMD64" : "";
2697   } else {
2698     cpu_type = "Unknown x86";
2699     x64 = cpu_is_em64t() ? " x86_64" : "";
2700   }
2701 
2702   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2703     cpu_type,
2704     cpu_family_description(),
2705     supports_ht() ? " (HT)" : "",
2706     supports_sse3() ? " SSE3" : "",
2707     supports_ssse3() ? " SSSE3" : "",
2708     supports_sse4_1() ? " SSE4.1" : "",
2709     supports_sse4_2() ? " SSE4.2" : "",
2710     supports_sse4a() ? " SSE4A" : "",
2711     is_netburst() ? " Netburst" : "",
2712     is_intel_family_core() ? " Core" : "",
2713     x64);
2714 
2715   return OS_OK;
2716 }
2717 
2718 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2719   assert(buf != nullptr, "buffer is null!");
2720   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2721   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2722 
2723   // invoke newly generated asm code to fetch CPU Brand String
2724   getCPUIDBrandString_stub(&_cpuid_info);
2725 
2726   // fetch results into buffer
2727   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2728   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2729   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2730   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2731   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2732   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2733   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2734   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2735   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2736   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2737   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2738   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2739 
2740   return OS_OK;
2741 }
2742 
2743 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2744   guarantee(buf != nullptr, "buffer is null!");
2745   guarantee(buf_len > 0, "buffer len not enough!");
2746 
2747   unsigned int flag = 0;
2748   unsigned int fi = 0;
2749   size_t       written = 0;
2750   const char*  prefix = "";
2751 
2752 #define WRITE_TO_BUF(string)                                                          \
2753   {                                                                                   \
2754     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2755     if (res < 0) {                                                                    \
2756       return buf_len - 1;                                                             \
2757     }                                                                                 \
2758     written += res;                                                                   \
2759     if (prefix[0] == '\0') {                                                          \
2760       prefix = ", ";                                                                  \
2761     }                                                                                 \
2762   }
2763 
2764   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2765     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2766       continue; /* no hyperthreading */
2767     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2768       continue; /* no fast system call */
2769     }
2770     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2771       WRITE_TO_BUF(_feature_edx_id[fi]);
2772     }
2773   }
2774 
2775   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2776     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2777       WRITE_TO_BUF(_feature_ecx_id[fi]);
2778     }
2779   }
2780 
2781   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2782     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2783       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2784     }
2785   }
2786 
2787   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2788     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2789       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2790     }
2791   }
2792 
2793   if (supports_tscinv_bit()) {
2794       WRITE_TO_BUF("Invariant TSC");
2795   }
2796 
2797   if (supports_hybrid()) {
2798       WRITE_TO_BUF("Hybrid Architecture");
2799   }
2800 
2801   return written;
2802 }
2803 
2804 /**
2805  * Write a detailed description of the cpu to a given buffer, including
2806  * feature set.
2807  */
2808 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2809   assert(buf != nullptr, "buffer is null!");
2810   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2811 
2812   static const char* unknown = "<unknown>";
2813   char               vendor_id[VENDOR_LENGTH];
2814   const char*        family = nullptr;
2815   const char*        model = nullptr;
2816   const char*        brand = nullptr;
2817   int                outputLen = 0;
2818 
2819   family = cpu_family_description();
2820   if (family == nullptr) {
2821     family = unknown;
2822   }
2823 
2824   model = cpu_model_description();
2825   if (model == nullptr) {
2826     model = unknown;
2827   }
2828 
2829   brand = cpu_brand_string();
2830 
2831   if (brand == nullptr) {
2832     brand = cpu_brand();
2833     if (brand == nullptr) {
2834       brand = unknown;
2835     }
2836   }
2837 
2838   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2839   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2840   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2841   vendor_id[VENDOR_LENGTH-1] = '\0';
2842 
2843   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2844     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2845     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2846     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2847     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2848     "Supports: ",
2849     brand,
2850     vendor_id,
2851     family,
2852     extended_cpu_family(),
2853     model,
2854     extended_cpu_model(),
2855     cpu_stepping(),
2856     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2857     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2858     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2859     _cpuid_info.std_cpuid1_eax.value,
2860     _cpuid_info.std_cpuid1_ebx.value,
2861     _cpuid_info.std_cpuid1_ecx.value,
2862     _cpuid_info.std_cpuid1_edx.value,
2863     _cpuid_info.ext_cpuid1_eax,
2864     _cpuid_info.ext_cpuid1_ebx,
2865     _cpuid_info.ext_cpuid1_ecx,
2866     _cpuid_info.ext_cpuid1_edx);
2867 
2868   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2869     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2870     return OS_ERR;
2871   }
2872 
2873   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2874 
2875   return OS_OK;
2876 }
2877 
2878 
2879 // Fill in Abstract_VM_Version statics
2880 void VM_Version::initialize_cpu_information() {
2881   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2882   assert(!_initialized, "shouldn't be initialized yet");
2883   resolve_cpu_information_details();
2884 
2885   // initialize cpu_name and cpu_desc
2886   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2887   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2888   _initialized = true;
2889 }
2890 
2891 /**
2892  *  For information about extracting the frequency from the cpu brand string, please see:
2893  *
2894  *    Intel Processor Identification and the CPUID Instruction
2895  *    Application Note 485
2896  *    May 2012
2897  *
2898  * The return value is the frequency in Hz.
2899  */
2900 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2901   const char* const brand_string = cpu_brand_string();
2902   if (brand_string == nullptr) {
2903     return 0;
2904   }
2905   const int64_t MEGA = 1000000;
2906   int64_t multiplier = 0;
2907   int64_t frequency = 0;
2908   uint8_t idx = 0;
2909   // The brand string buffer is at most 48 bytes.
2910   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2911   for (; idx < 48-2; ++idx) {
2912     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2913     // Search brand string for "yHz" where y is M, G, or T.
2914     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2915       if (brand_string[idx] == 'M') {
2916         multiplier = MEGA;
2917       } else if (brand_string[idx] == 'G') {
2918         multiplier = MEGA * 1000;
2919       } else if (brand_string[idx] == 'T') {
2920         multiplier = MEGA * MEGA;
2921       }
2922       break;
2923     }
2924   }
2925   if (multiplier > 0) {
2926     // Compute frequency (in Hz) from brand string.
2927     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2928       frequency =  (brand_string[idx-4] - '0') * multiplier;
2929       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2930       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2931     } else { // format is "xxxx"
2932       frequency =  (brand_string[idx-4] - '0') * 1000;
2933       frequency += (brand_string[idx-3] - '0') * 100;
2934       frequency += (brand_string[idx-2] - '0') * 10;
2935       frequency += (brand_string[idx-1] - '0');
2936       frequency *= multiplier;
2937     }
2938   }
2939   return frequency;
2940 }
2941 
2942 
2943 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2944   if (_max_qualified_cpu_frequency == 0) {
2945     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2946   }
2947   return _max_qualified_cpu_frequency;
2948 }
2949 
2950 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2951   VM_Features vm_features;
2952   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2953     vm_features.set_feature(CPU_CX8);
2954   if (std_cpuid1_edx.bits.cmov != 0)
2955     vm_features.set_feature(CPU_CMOV);
2956   if (std_cpuid1_edx.bits.clflush != 0)
2957     vm_features.set_feature(CPU_FLUSH);
2958   // clflush should always be available on x86_64
2959   // if not we are in real trouble because we rely on it
2960   // to flush the code cache.
2961   assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2962   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2963       ext_cpuid1_edx.bits.fxsr != 0))
2964     vm_features.set_feature(CPU_FXSR);
2965   // HT flag is set for multi-core processors also.
2966   if (threads_per_core() > 1)
2967     vm_features.set_feature(CPU_HT);
2968   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2969       ext_cpuid1_edx.bits.mmx != 0))
2970     vm_features.set_feature(CPU_MMX);
2971   if (std_cpuid1_edx.bits.sse != 0)
2972     vm_features.set_feature(CPU_SSE);
2973   if (std_cpuid1_edx.bits.sse2 != 0)
2974     vm_features.set_feature(CPU_SSE2);
2975   if (std_cpuid1_ecx.bits.sse3 != 0)
2976     vm_features.set_feature(CPU_SSE3);
2977   if (std_cpuid1_ecx.bits.ssse3 != 0)
2978     vm_features.set_feature(CPU_SSSE3);
2979   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2980     vm_features.set_feature(CPU_SSE4_1);
2981   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2982     vm_features.set_feature(CPU_SSE4_2);
2983   if (std_cpuid1_ecx.bits.popcnt != 0)
2984     vm_features.set_feature(CPU_POPCNT);
2985   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2986       xem_xcr0_eax.bits.apx_f != 0 &&
2987       std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2988     vm_features.set_feature(CPU_APX_F);
2989   }
2990   if (std_cpuid1_ecx.bits.avx != 0 &&
2991       std_cpuid1_ecx.bits.osxsave != 0 &&
2992       xem_xcr0_eax.bits.sse != 0 &&
2993       xem_xcr0_eax.bits.ymm != 0) {
2994     vm_features.set_feature(CPU_AVX);
2995     vm_features.set_feature(CPU_VZEROUPPER);
2996     if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2997       vm_features.set_feature(CPU_SHA512);
2998     if (std_cpuid1_ecx.bits.f16c != 0)
2999       vm_features.set_feature(CPU_F16C);
3000     if (sef_cpuid7_ebx.bits.avx2 != 0) {
3001       vm_features.set_feature(CPU_AVX2);
3002       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
3003         vm_features.set_feature(CPU_AVX_IFMA);
3004     }
3005     if (sef_cpuid7_ecx.bits.gfni != 0)
3006         vm_features.set_feature(CPU_GFNI);
3007     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
3008         xem_xcr0_eax.bits.opmask != 0 &&
3009         xem_xcr0_eax.bits.zmm512 != 0 &&
3010         xem_xcr0_eax.bits.zmm32 != 0) {
3011       vm_features.set_feature(CPU_AVX512F);
3012       if (sef_cpuid7_ebx.bits.avx512cd != 0)
3013         vm_features.set_feature(CPU_AVX512CD);
3014       if (sef_cpuid7_ebx.bits.avx512dq != 0)
3015         vm_features.set_feature(CPU_AVX512DQ);
3016       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
3017         vm_features.set_feature(CPU_AVX512_IFMA);
3018       if (sef_cpuid7_ebx.bits.avx512pf != 0)
3019         vm_features.set_feature(CPU_AVX512PF);
3020       if (sef_cpuid7_ebx.bits.avx512er != 0)
3021         vm_features.set_feature(CPU_AVX512ER);
3022       if (sef_cpuid7_ebx.bits.avx512bw != 0)
3023         vm_features.set_feature(CPU_AVX512BW);
3024       if (sef_cpuid7_ebx.bits.avx512vl != 0)
3025         vm_features.set_feature(CPU_AVX512VL);
3026       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3027         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3028       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3029         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3030       if (sef_cpuid7_ecx.bits.vaes != 0)
3031         vm_features.set_feature(CPU_AVX512_VAES);
3032       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3033         vm_features.set_feature(CPU_AVX512_VNNI);
3034       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3035         vm_features.set_feature(CPU_AVX512_BITALG);
3036       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3037         vm_features.set_feature(CPU_AVX512_VBMI);
3038       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3039         vm_features.set_feature(CPU_AVX512_VBMI2);
3040     }
3041     if (is_intel()) {
3042       if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
3043           std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
3044           std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
3045           xem_xcr0_eax.bits.opmask != 0 &&
3046           xem_xcr0_eax.bits.zmm512 != 0 &&
3047           xem_xcr0_eax.bits.zmm32 != 0) {
3048         vm_features.set_feature(CPU_AVX10_1);
3049         vm_features.set_feature(CPU_AVX512F);
3050         vm_features.set_feature(CPU_AVX512CD);
3051         vm_features.set_feature(CPU_AVX512DQ);
3052         vm_features.set_feature(CPU_AVX512PF);
3053         vm_features.set_feature(CPU_AVX512ER);
3054         vm_features.set_feature(CPU_AVX512BW);
3055         vm_features.set_feature(CPU_AVX512VL);
3056         vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3057         vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3058         vm_features.set_feature(CPU_AVX512_VAES);
3059         vm_features.set_feature(CPU_AVX512_VNNI);
3060         vm_features.set_feature(CPU_AVX512_BITALG);
3061         vm_features.set_feature(CPU_AVX512_VBMI);
3062         vm_features.set_feature(CPU_AVX512_VBMI2);
3063         if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3064           vm_features.set_feature(CPU_AVX10_2);
3065         }
3066       }
3067     }
3068   }
3069 
3070   if (std_cpuid1_ecx.bits.hv != 0)
3071     vm_features.set_feature(CPU_HV);
3072   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3073     vm_features.set_feature(CPU_BMI1);
3074   if (std_cpuid1_edx.bits.tsc != 0)
3075     vm_features.set_feature(CPU_TSC);
3076   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3077     vm_features.set_feature(CPU_TSCINV_BIT);
3078   if (std_cpuid1_ecx.bits.aes != 0)
3079     vm_features.set_feature(CPU_AES);
3080   if (ext_cpuid1_ecx.bits.lzcnt != 0)
3081     vm_features.set_feature(CPU_LZCNT);
3082   if (ext_cpuid1_ecx.bits.prefetchw != 0)
3083     vm_features.set_feature(CPU_3DNOW_PREFETCH);
3084   if (sef_cpuid7_ebx.bits.erms != 0)
3085     vm_features.set_feature(CPU_ERMS);
3086   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3087     vm_features.set_feature(CPU_FSRM);
3088   if (std_cpuid1_ecx.bits.clmul != 0)
3089     vm_features.set_feature(CPU_CLMUL);
3090   if (sef_cpuid7_ebx.bits.rtm != 0)
3091     vm_features.set_feature(CPU_RTM);
3092   if (sef_cpuid7_ebx.bits.adx != 0)
3093      vm_features.set_feature(CPU_ADX);
3094   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3095     vm_features.set_feature(CPU_BMI2);
3096   if (sef_cpuid7_ebx.bits.sha != 0)
3097     vm_features.set_feature(CPU_SHA);
3098   if (std_cpuid1_ecx.bits.fma != 0)
3099     vm_features.set_feature(CPU_FMA);
3100   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3101     vm_features.set_feature(CPU_FLUSHOPT);
3102   if (sef_cpuid7_ebx.bits.clwb != 0)
3103     vm_features.set_feature(CPU_CLWB);
3104   if (ext_cpuid1_edx.bits.rdtscp != 0)
3105     vm_features.set_feature(CPU_RDTSCP);
3106   if (sef_cpuid7_ecx.bits.rdpid != 0)
3107     vm_features.set_feature(CPU_RDPID);
3108 
3109   // AMD|Hygon additional features.
3110   if (is_amd_family()) {
3111     // PREFETCHW was checked above, check TDNOW here.
3112     if ((ext_cpuid1_edx.bits.tdnow != 0))
3113       vm_features.set_feature(CPU_3DNOW_PREFETCH);
3114     if (ext_cpuid1_ecx.bits.sse4a != 0)
3115       vm_features.set_feature(CPU_SSE4A);
3116   }
3117 
3118   // Intel additional features.
3119   if (is_intel()) {
3120     if (sef_cpuid7_edx.bits.serialize != 0)
3121       vm_features.set_feature(CPU_SERIALIZE);
3122     if (sef_cpuid7_edx.bits.hybrid != 0)
3123       vm_features.set_feature(CPU_HYBRID);
3124     if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3125       vm_features.set_feature(CPU_AVX512_FP16);
3126   }
3127 
3128   // ZX additional features.
3129   if (is_zx()) {
3130     // We do not know if these are supported by ZX, so we cannot trust
3131     // common CPUID bit for them.
3132     assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3133     vm_features.clear_feature(CPU_CLWB);
3134   }
3135 
3136   // Protection key features.
3137   if (sef_cpuid7_ecx.bits.pku != 0) {
3138     vm_features.set_feature(CPU_PKU);
3139   }
3140   if (sef_cpuid7_ecx.bits.ospke != 0) {
3141     vm_features.set_feature(CPU_OSPKE);
3142   }
3143 
3144   // Control flow enforcement (CET) features.
3145   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3146     vm_features.set_feature(CPU_CET_SS);
3147   }
3148   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3149     vm_features.set_feature(CPU_CET_IBT);
3150   }
3151 
3152   // Composite features.
3153   if (supports_tscinv_bit() &&
3154       ((is_amd_family() && !is_amd_Barcelona()) ||
3155        is_intel_tsc_synched_at_init())) {
3156     vm_features.set_feature(CPU_TSCINV);
3157   }
3158   return vm_features;
3159 }
3160 
3161 bool VM_Version::os_supports_avx_vectors() {
3162   bool retVal = false;
3163   int nreg = 4;
3164   if (supports_evex()) {
3165     // Verify that OS save/restore all bits of EVEX registers
3166     // during signal processing.
3167     retVal = true;
3168     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3169       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3170         retVal = false;
3171         break;
3172       }
3173     }
3174   } else if (supports_avx()) {
3175     // Verify that OS save/restore all bits of AVX registers
3176     // during signal processing.
3177     retVal = true;
3178     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3179       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3180         retVal = false;
3181         break;
3182       }
3183     }
3184     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3185     if (retVal == false) {
3186       // Verify that OS save/restore all bits of EVEX registers
3187       // during signal processing.
3188       retVal = true;
3189       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3190         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3191           retVal = false;
3192           break;
3193         }
3194       }
3195     }
3196   }
3197   return retVal;
3198 }
3199 
3200 bool VM_Version::os_supports_apx_egprs() {
3201   if (!supports_apx_f()) {
3202     return false;
3203   }
3204   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3205       _cpuid_info.apx_save[1] != egpr_test_value()) {
3206     return false;
3207   }
3208   return true;
3209 }
3210 
3211 uint VM_Version::cores_per_cpu() {
3212   uint result = 1;
3213   if (is_intel()) {
3214     bool supports_topology = supports_processor_topology();
3215     if (supports_topology) {
3216       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3217                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3218     }
3219     if (!supports_topology || result == 0) {
3220       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3221     }
3222   } else if (is_amd_family()) {
3223     result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3224     if (cpu_family() >= 0x17) { // Zen or later
3225       result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3226     }
3227   } else if (is_zx()) {
3228     bool supports_topology = supports_processor_topology();
3229     if (supports_topology) {
3230       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3231                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3232     }
3233     if (!supports_topology || result == 0) {
3234       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3235     }
3236   }
3237   return result;
3238 }
3239 
3240 uint VM_Version::threads_per_core() {
3241   uint result = 1;
3242   if (is_intel() && supports_processor_topology()) {
3243     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3244   } else if (is_zx() && supports_processor_topology()) {
3245     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3246   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3247     if (cpu_family() >= 0x17) {
3248       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3249     } else {
3250       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3251                  cores_per_cpu();
3252     }
3253   }
3254   return (result == 0 ? 1 : result);
3255 }
3256 
3257 uint VM_Version::L1_line_size() {
3258   uint result = 0;
3259   if (is_intel()) {
3260     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3261   } else if (is_amd_family()) {
3262     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3263   } else if (is_zx()) {
3264     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3265   }
3266   if (result < 32) // not defined ?
3267     result = 32;   // 32 bytes by default on x86 and other x64
3268   return result;
3269 }
3270 
3271 bool VM_Version::is_intel_tsc_synched_at_init() {
3272   if (is_intel_family_core()) {
3273     uint32_t ext_model = extended_cpu_model();
3274     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3275         ext_model == CPU_MODEL_WESTMERE_EP    ||
3276         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3277         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3278       // <= 2-socket invariant tsc support. EX versions are usually used
3279       // in > 2-socket systems and likely don't synchronize tscs at
3280       // initialization.
3281       // Code that uses tsc values must be prepared for them to arbitrarily
3282       // jump forward or backward.
3283       return true;
3284     }
3285   }
3286   return false;
3287 }
3288 
3289 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3290   // Hardware prefetching (distance/size in bytes):
3291   // Pentium 3 -  64 /  32
3292   // Pentium 4 - 256 / 128
3293   // Athlon    -  64 /  32 ????
3294   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3295   // Core      - 128 /  64
3296   //
3297   // Software prefetching (distance in bytes / instruction with best score):
3298   // Pentium 3 - 128 / prefetchnta
3299   // Pentium 4 - 512 / prefetchnta
3300   // Athlon    - 128 / prefetchnta
3301   // Opteron   - 256 / prefetchnta
3302   // Core      - 256 / prefetchnta
3303   // It will be used only when AllocatePrefetchStyle > 0
3304 
3305   if (is_amd_family()) { // AMD | Hygon
3306     if (supports_sse2()) {
3307       return 256; // Opteron
3308     } else {
3309       return 128; // Athlon
3310     }
3311   } else if (is_zx()) {
3312     if (supports_sse2()) {
3313       return 256;
3314     } else {
3315       return 128;
3316     }
3317   } else { // Intel
3318     if (supports_sse3() && is_intel_server_family()) {
3319       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3320         return 192;
3321       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3322         return 384;
3323       }
3324     }
3325     if (supports_sse2()) {
3326       if (is_intel_server_family()) {
3327         return 256; // Pentium M, Core, Core2
3328       } else {
3329         return 512; // Pentium 4
3330       }
3331     } else {
3332       return 128; // Pentium 3 (and all other old CPUs)
3333     }
3334   }
3335 }
3336 
3337 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3338   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3339   switch (id) {
3340   case vmIntrinsics::_floatToFloat16:
3341   case vmIntrinsics::_float16ToFloat:
3342     if (!supports_float16()) {
3343       return false;
3344     }
3345     break;
3346   default:
3347     break;
3348   }
3349   return true;
3350 }
3351 
3352 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3353   int i = 0;
3354   ss.join([&]() {
3355     const char* str = nullptr;
3356     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3357       if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3358         str = _features_names[i];
3359       }
3360       i += 1;
3361     }
3362     return str;
3363   }, ", ");
3364 }
3365 
3366 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3367   VM_Features* features = (VM_Features*)features_buffer;
3368   insert_features_names(*features, ss);
3369 }
3370 
3371 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3372   VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3373   VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3374   int i = 0;
3375   ss.join([&]() {
3376     const char* str = nullptr;
3377     while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3378       Feature_Flag flag = (Feature_Flag)i;
3379       if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3380         str = _features_names[i];
3381       }
3382       i += 1;
3383     }
3384     return str;
3385   }, ", ");
3386 }
3387 
3388 int VM_Version::cpu_features_size() {
3389   return sizeof(VM_Features);
3390 }
3391 
3392 void VM_Version::store_cpu_features(void* buf) {
3393   VM_Features copy = _features;
3394   copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
3395   memcpy(buf, &copy, sizeof(VM_Features));
3396 }
3397 
3398 bool VM_Version::supports_features(void* features_buffer) {
3399   VM_Features* features_to_test = (VM_Features*)features_buffer;
3400   return _features.supports_features(features_to_test);
3401 }