1 /*
   2  * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/checkedCast.hpp"
  42 #include "utilities/powerOfTwo.hpp"
  43 #include "utilities/virtualizationSupport.hpp"
  44 
  45 int VM_Version::_cpu;
  46 int VM_Version::_model;
  47 int VM_Version::_stepping;
  48 bool VM_Version::_has_intel_jcc_erratum;
  49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  50 
  51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  53 #undef DECLARE_CPU_FEATURE_FLAG
  54 
  55 // Address of instruction which causes SEGV
  56 address VM_Version::_cpuinfo_segv_addr = nullptr;
  57 // Address of instruction after the one which causes SEGV
  58 address VM_Version::_cpuinfo_cont_addr = nullptr;
  59 // Address of instruction which causes APX specific SEGV
  60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
  61 // Address of instruction after the one which causes APX specific SEGV
  62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
  63 
  64 static BufferBlob* stub_blob;
  65 static const int stub_size = 2000;
  66 
  67 extern "C" {
  68   typedef void (*get_cpu_info_stub_t)(void*);
  69   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  70   typedef void (*clear_apx_test_state_t)(void);
  71 }
  72 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  73 static detect_virt_stub_t detect_virt_stub = nullptr;
  74 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
  75 
  76 #ifdef _LP64
  77 
  78 bool VM_Version::supports_clflush() {
  79   // clflush should always be available on x86_64
  80   // if not we are in real trouble because we rely on it
  81   // to flush the code cache.
  82   // Unfortunately, Assembler::clflush is currently called as part
  83   // of generation of the code cache flush routine. This happens
  84   // under Universe::init before the processor features are set
  85   // up. Assembler::flush calls this routine to check that clflush
  86   // is allowed. So, we give the caller a free pass if Universe init
  87   // is still in progress.
  88   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  89   return true;
  90 }
  91 #endif
  92 
  93 #define CPUID_STANDARD_FN   0x0
  94 #define CPUID_STANDARD_FN_1 0x1
  95 #define CPUID_STANDARD_FN_4 0x4
  96 #define CPUID_STANDARD_FN_B 0xb
  97 
  98 #define CPUID_EXTENDED_FN   0x80000000
  99 #define CPUID_EXTENDED_FN_1 0x80000001
 100 #define CPUID_EXTENDED_FN_2 0x80000002
 101 #define CPUID_EXTENDED_FN_3 0x80000003
 102 #define CPUID_EXTENDED_FN_4 0x80000004
 103 #define CPUID_EXTENDED_FN_7 0x80000007
 104 #define CPUID_EXTENDED_FN_8 0x80000008
 105 
 106 class VM_Version_StubGenerator: public StubCodeGenerator {
 107  public:
 108 
 109   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 110 
 111 #if defined(_LP64)
 112   address clear_apx_test_state() {
 113 #   define __ _masm->
 114     address start = __ pc();
 115     // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
 116     // handling guarantees that preserved register values post signal handling were
 117     // re-instantiated by operating system and not because they were not modified externally.
 118 
 119     bool save_apx = UseAPX;
 120     VM_Version::set_apx_cpuFeatures();
 121     UseAPX = true;
 122     // EGPR state save/restoration.
 123     __ mov64(r16, 0L);
 124     __ mov64(r31, 0L);
 125     UseAPX = save_apx;
 126     VM_Version::clean_cpuFeatures();
 127     __ ret(0);
 128     return start;
 129   }
 130 #endif
 131 
 132   address generate_get_cpu_info() {
 133     // Flags to test CPU type.
 134     const uint32_t HS_EFL_AC = 0x40000;
 135     const uint32_t HS_EFL_ID = 0x200000;
 136     // Values for when we don't have a CPUID instruction.
 137     const int      CPU_FAMILY_SHIFT = 8;
 138     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 139     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 140     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 141 
 142     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 143     Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
 144     Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
 145     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 146 
 147     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 148 #   define __ _masm->
 149 
 150     address start = __ pc();
 151 
 152     //
 153     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 154     //
 155     // LP64: rcx and rdx are first and second argument registers on windows
 156 
 157     __ push(rbp);
 158 #ifdef _LP64
 159     __ mov(rbp, c_rarg0); // cpuid_info address
 160 #else
 161     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 162 #endif
 163     __ push(rbx);
 164     __ push(rsi);
 165     __ pushf();          // preserve rbx, and flags
 166     __ pop(rax);
 167     __ push(rax);
 168     __ mov(rcx, rax);
 169     //
 170     // if we are unable to change the AC flag, we have a 386
 171     //
 172     __ xorl(rax, HS_EFL_AC);
 173     __ push(rax);
 174     __ popf();
 175     __ pushf();
 176     __ pop(rax);
 177     __ cmpptr(rax, rcx);
 178     __ jccb(Assembler::notEqual, detect_486);
 179 
 180     __ movl(rax, CPU_FAMILY_386);
 181     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 182     __ jmp(done);
 183 
 184     //
 185     // If we are unable to change the ID flag, we have a 486 which does
 186     // not support the "cpuid" instruction.
 187     //
 188     __ bind(detect_486);
 189     __ mov(rax, rcx);
 190     __ xorl(rax, HS_EFL_ID);
 191     __ push(rax);
 192     __ popf();
 193     __ pushf();
 194     __ pop(rax);
 195     __ cmpptr(rcx, rax);
 196     __ jccb(Assembler::notEqual, detect_586);
 197 
 198     __ bind(cpu486);
 199     __ movl(rax, CPU_FAMILY_486);
 200     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 201     __ jmp(done);
 202 
 203     //
 204     // At this point, we have a chip which supports the "cpuid" instruction
 205     //
 206     __ bind(detect_586);
 207     __ xorl(rax, rax);
 208     __ cpuid();
 209     __ orl(rax, rax);
 210     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 211                                         // value of at least 1, we give up and
 212                                         // assume a 486
 213     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 214     __ movl(Address(rsi, 0), rax);
 215     __ movl(Address(rsi, 4), rbx);
 216     __ movl(Address(rsi, 8), rcx);
 217     __ movl(Address(rsi,12), rdx);
 218 
 219     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 220     __ jccb(Assembler::belowEqual, std_cpuid4);
 221 
 222     //
 223     // cpuid(0xB) Processor Topology
 224     //
 225     __ movl(rax, 0xb);
 226     __ xorl(rcx, rcx);   // Threads level
 227     __ cpuid();
 228 
 229     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 230     __ movl(Address(rsi, 0), rax);
 231     __ movl(Address(rsi, 4), rbx);
 232     __ movl(Address(rsi, 8), rcx);
 233     __ movl(Address(rsi,12), rdx);
 234 
 235     __ movl(rax, 0xb);
 236     __ movl(rcx, 1);     // Cores level
 237     __ cpuid();
 238     __ push(rax);
 239     __ andl(rax, 0x1f);  // Determine if valid topology level
 240     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 241     __ andl(rax, 0xffff);
 242     __ pop(rax);
 243     __ jccb(Assembler::equal, std_cpuid4);
 244 
 245     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 246     __ movl(Address(rsi, 0), rax);
 247     __ movl(Address(rsi, 4), rbx);
 248     __ movl(Address(rsi, 8), rcx);
 249     __ movl(Address(rsi,12), rdx);
 250 
 251     __ movl(rax, 0xb);
 252     __ movl(rcx, 2);     // Packages level
 253     __ cpuid();
 254     __ push(rax);
 255     __ andl(rax, 0x1f);  // Determine if valid topology level
 256     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 257     __ andl(rax, 0xffff);
 258     __ pop(rax);
 259     __ jccb(Assembler::equal, std_cpuid4);
 260 
 261     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 262     __ movl(Address(rsi, 0), rax);
 263     __ movl(Address(rsi, 4), rbx);
 264     __ movl(Address(rsi, 8), rcx);
 265     __ movl(Address(rsi,12), rdx);
 266 
 267     //
 268     // cpuid(0x4) Deterministic cache params
 269     //
 270     __ bind(std_cpuid4);
 271     __ movl(rax, 4);
 272     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 273     __ jccb(Assembler::greater, std_cpuid1);
 274 
 275     __ xorl(rcx, rcx);   // L1 cache
 276     __ cpuid();
 277     __ push(rax);
 278     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 279     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 280     __ pop(rax);
 281     __ jccb(Assembler::equal, std_cpuid1);
 282 
 283     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 284     __ movl(Address(rsi, 0), rax);
 285     __ movl(Address(rsi, 4), rbx);
 286     __ movl(Address(rsi, 8), rcx);
 287     __ movl(Address(rsi,12), rdx);
 288 
 289     //
 290     // Standard cpuid(0x1)
 291     //
 292     __ bind(std_cpuid1);
 293     __ movl(rax, 1);
 294     __ cpuid();
 295     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 296     __ movl(Address(rsi, 0), rax);
 297     __ movl(Address(rsi, 4), rbx);
 298     __ movl(Address(rsi, 8), rcx);
 299     __ movl(Address(rsi,12), rdx);
 300 
 301     //
 302     // Check if OS has enabled XGETBV instruction to access XCR0
 303     // (OSXSAVE feature flag) and CPU supports AVX
 304     //
 305     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 306     __ cmpl(rcx, 0x18000000);
 307     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 308 
 309     //
 310     // XCR0, XFEATURE_ENABLED_MASK register
 311     //
 312     __ xorl(rcx, rcx);   // zero for XCR0 register
 313     __ xgetbv();
 314     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 315     __ movl(Address(rsi, 0), rax);
 316     __ movl(Address(rsi, 4), rdx);
 317 
 318     //
 319     // cpuid(0x7) Structured Extended Features Enumeration Leaf.
 320     //
 321     __ bind(sef_cpuid);
 322     __ movl(rax, 7);
 323     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 324     __ jccb(Assembler::greater, ext_cpuid);
 325     // ECX = 0
 326     __ xorl(rcx, rcx);
 327     __ cpuid();
 328     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 329     __ movl(Address(rsi, 0), rax);
 330     __ movl(Address(rsi, 4), rbx);
 331     __ movl(Address(rsi, 8), rcx);
 332     __ movl(Address(rsi, 12), rdx);
 333 
 334     //
 335     // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
 336     //
 337     __ bind(sefsl1_cpuid);
 338     __ movl(rax, 7);
 339     __ movl(rcx, 1);
 340     __ cpuid();
 341     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 342     __ movl(Address(rsi, 0), rax);
 343     __ movl(Address(rsi, 4), rdx);
 344 
 345     //
 346     // Extended cpuid(0x80000000)
 347     //
 348     __ bind(ext_cpuid);
 349     __ movl(rax, 0x80000000);
 350     __ cpuid();
 351     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 352     __ jcc(Assembler::belowEqual, done);
 353     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 354     __ jcc(Assembler::belowEqual, ext_cpuid1);
 355     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 356     __ jccb(Assembler::belowEqual, ext_cpuid5);
 357     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 358     __ jccb(Assembler::belowEqual, ext_cpuid7);
 359     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 360     __ jccb(Assembler::belowEqual, ext_cpuid8);
 361     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 362     __ jccb(Assembler::below, ext_cpuid8);
 363     //
 364     // Extended cpuid(0x8000001E)
 365     //
 366     __ movl(rax, 0x8000001E);
 367     __ cpuid();
 368     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 369     __ movl(Address(rsi, 0), rax);
 370     __ movl(Address(rsi, 4), rbx);
 371     __ movl(Address(rsi, 8), rcx);
 372     __ movl(Address(rsi,12), rdx);
 373 
 374     //
 375     // Extended cpuid(0x80000008)
 376     //
 377     __ bind(ext_cpuid8);
 378     __ movl(rax, 0x80000008);
 379     __ cpuid();
 380     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 381     __ movl(Address(rsi, 0), rax);
 382     __ movl(Address(rsi, 4), rbx);
 383     __ movl(Address(rsi, 8), rcx);
 384     __ movl(Address(rsi,12), rdx);
 385 
 386     //
 387     // Extended cpuid(0x80000007)
 388     //
 389     __ bind(ext_cpuid7);
 390     __ movl(rax, 0x80000007);
 391     __ cpuid();
 392     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 393     __ movl(Address(rsi, 0), rax);
 394     __ movl(Address(rsi, 4), rbx);
 395     __ movl(Address(rsi, 8), rcx);
 396     __ movl(Address(rsi,12), rdx);
 397 
 398     //
 399     // Extended cpuid(0x80000005)
 400     //
 401     __ bind(ext_cpuid5);
 402     __ movl(rax, 0x80000005);
 403     __ cpuid();
 404     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 405     __ movl(Address(rsi, 0), rax);
 406     __ movl(Address(rsi, 4), rbx);
 407     __ movl(Address(rsi, 8), rcx);
 408     __ movl(Address(rsi,12), rdx);
 409 
 410     //
 411     // Extended cpuid(0x80000001)
 412     //
 413     __ bind(ext_cpuid1);
 414     __ movl(rax, 0x80000001);
 415     __ cpuid();
 416     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 417     __ movl(Address(rsi, 0), rax);
 418     __ movl(Address(rsi, 4), rbx);
 419     __ movl(Address(rsi, 8), rcx);
 420     __ movl(Address(rsi,12), rdx);
 421 
 422 #if defined(_LP64)
 423     //
 424     // Check if OS has enabled XGETBV instruction to access XCR0
 425     // (OSXSAVE feature flag) and CPU supports APX
 426     //
 427     // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
 428     // and XCRO[19] bit for OS support to save/restore extended GPR state.
 429     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
 430     __ movl(rax, 0x200000);
 431     __ andl(rax, Address(rsi, 4));
 432     __ cmpl(rax, 0x200000);
 433     __ jcc(Assembler::notEqual, vector_save_restore);
 434     // check _cpuid_info.xem_xcr0_eax.bits.apx_f
 435     __ movl(rax, 0x80000);
 436     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
 437     __ cmpl(rax, 0x80000);
 438     __ jcc(Assembler::notEqual, vector_save_restore);
 439 
 440 #ifndef PRODUCT
 441     bool save_apx = UseAPX;
 442     VM_Version::set_apx_cpuFeatures();
 443     UseAPX = true;
 444     __ mov64(r16, VM_Version::egpr_test_value());
 445     __ mov64(r31, VM_Version::egpr_test_value());
 446     __ xorl(rsi, rsi);
 447     VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
 448     // Generate SEGV
 449     __ movl(rax, Address(rsi, 0));
 450 
 451     VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
 452     __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
 453     __ movq(Address(rsi, 0), r16);
 454     __ movq(Address(rsi, 8), r31);
 455 
 456     UseAPX = save_apx;
 457 #endif
 458 #endif
 459     __ bind(vector_save_restore);
 460     //
 461     // Check if OS has enabled XGETBV instruction to access XCR0
 462     // (OSXSAVE feature flag) and CPU supports AVX
 463     //
 464     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 465     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 466     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 467     __ cmpl(rcx, 0x18000000);
 468     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 469 
 470     __ movl(rax, 0x6);
 471     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 472     __ cmpl(rax, 0x6);
 473     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 474 
 475     // we need to bridge farther than imm8, so we use this island as a thunk
 476     __ bind(done);
 477     __ jmp(wrapup);
 478 
 479     __ bind(start_simd_check);
 480     //
 481     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 482     // registers are not restored after a signal processing.
 483     // Generate SEGV here (reference through null)
 484     // and check upper YMM/ZMM bits after it.
 485     //
 486     int saved_useavx = UseAVX;
 487     int saved_usesse = UseSSE;
 488 
 489     // If UseAVX is uninitialized or is set by the user to include EVEX
 490     if (use_evex) {
 491       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 492       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 493       __ movl(rax, 0x10000);
 494       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 495       __ cmpl(rax, 0x10000);
 496       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 497       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 498       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 499       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 500       __ movl(rax, 0xE0);
 501       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 502       __ cmpl(rax, 0xE0);
 503       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 504 
 505       if (FLAG_IS_DEFAULT(UseAVX)) {
 506         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 507         __ movl(rax, Address(rsi, 0));
 508         __ cmpl(rax, 0x50654);              // If it is Skylake
 509         __ jcc(Assembler::equal, legacy_setup);
 510       }
 511       // EVEX setup: run in lowest evex mode
 512       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 513       UseAVX = 3;
 514       UseSSE = 2;
 515 #ifdef _WINDOWS
 516       // xmm5-xmm15 are not preserved by caller on windows
 517       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 518       __ subptr(rsp, 64);
 519       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 520 #ifdef _LP64
 521       __ subptr(rsp, 64);
 522       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 523       __ subptr(rsp, 64);
 524       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 525 #endif // _LP64
 526 #endif // _WINDOWS
 527 
 528       // load value into all 64 bytes of zmm7 register
 529       __ movl(rcx, VM_Version::ymm_test_value());
 530       __ movdl(xmm0, rcx);
 531       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 532       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 533 #ifdef _LP64
 534       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 535       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 536 #endif
 537       VM_Version::clean_cpuFeatures();
 538       __ jmp(save_restore_except);
 539     }
 540 
 541     __ bind(legacy_setup);
 542     // AVX setup
 543     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 544     UseAVX = 1;
 545     UseSSE = 2;
 546 #ifdef _WINDOWS
 547     __ subptr(rsp, 32);
 548     __ vmovdqu(Address(rsp, 0), xmm7);
 549 #ifdef _LP64
 550     __ subptr(rsp, 32);
 551     __ vmovdqu(Address(rsp, 0), xmm8);
 552     __ subptr(rsp, 32);
 553     __ vmovdqu(Address(rsp, 0), xmm15);
 554 #endif // _LP64
 555 #endif // _WINDOWS
 556 
 557     // load value into all 32 bytes of ymm7 register
 558     __ movl(rcx, VM_Version::ymm_test_value());
 559 
 560     __ movdl(xmm0, rcx);
 561     __ pshufd(xmm0, xmm0, 0x00);
 562     __ vinsertf128_high(xmm0, xmm0);
 563     __ vmovdqu(xmm7, xmm0);
 564 #ifdef _LP64
 565     __ vmovdqu(xmm8, xmm0);
 566     __ vmovdqu(xmm15, xmm0);
 567 #endif
 568     VM_Version::clean_cpuFeatures();
 569 
 570     __ bind(save_restore_except);
 571     __ xorl(rsi, rsi);
 572     VM_Version::set_cpuinfo_segv_addr(__ pc());
 573     // Generate SEGV
 574     __ movl(rax, Address(rsi, 0));
 575 
 576     VM_Version::set_cpuinfo_cont_addr(__ pc());
 577     // Returns here after signal. Save xmm0 to check it later.
 578 
 579     // If UseAVX is uninitialized or is set by the user to include EVEX
 580     if (use_evex) {
 581       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 582       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 583       __ movl(rax, 0x10000);
 584       __ andl(rax, Address(rsi, 4));
 585       __ cmpl(rax, 0x10000);
 586       __ jcc(Assembler::notEqual, legacy_save_restore);
 587       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 588       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 589       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 590       __ movl(rax, 0xE0);
 591       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 592       __ cmpl(rax, 0xE0);
 593       __ jcc(Assembler::notEqual, legacy_save_restore);
 594 
 595       if (FLAG_IS_DEFAULT(UseAVX)) {
 596         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 597         __ movl(rax, Address(rsi, 0));
 598         __ cmpl(rax, 0x50654);              // If it is Skylake
 599         __ jcc(Assembler::equal, legacy_save_restore);
 600       }
 601       // EVEX check: run in lowest evex mode
 602       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 603       UseAVX = 3;
 604       UseSSE = 2;
 605       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 606       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 607       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 608 #ifdef _LP64
 609       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 610       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 611 #endif
 612 
 613 #ifdef _WINDOWS
 614 #ifdef _LP64
 615       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 616       __ addptr(rsp, 64);
 617       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 618       __ addptr(rsp, 64);
 619 #endif // _LP64
 620       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 621       __ addptr(rsp, 64);
 622 #endif // _WINDOWS
 623       generate_vzeroupper(wrapup);
 624       VM_Version::clean_cpuFeatures();
 625       UseAVX = saved_useavx;
 626       UseSSE = saved_usesse;
 627       __ jmp(wrapup);
 628    }
 629 
 630     __ bind(legacy_save_restore);
 631     // AVX check
 632     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 633     UseAVX = 1;
 634     UseSSE = 2;
 635     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 636     __ vmovdqu(Address(rsi, 0), xmm0);
 637     __ vmovdqu(Address(rsi, 32), xmm7);
 638 #ifdef _LP64
 639     __ vmovdqu(Address(rsi, 64), xmm8);
 640     __ vmovdqu(Address(rsi, 96), xmm15);
 641 #endif
 642 
 643 #ifdef _WINDOWS
 644 #ifdef _LP64
 645     __ vmovdqu(xmm15, Address(rsp, 0));
 646     __ addptr(rsp, 32);
 647     __ vmovdqu(xmm8, Address(rsp, 0));
 648     __ addptr(rsp, 32);
 649 #endif // _LP64
 650     __ vmovdqu(xmm7, Address(rsp, 0));
 651     __ addptr(rsp, 32);
 652 #endif // _WINDOWS
 653 
 654     generate_vzeroupper(wrapup);
 655     VM_Version::clean_cpuFeatures();
 656     UseAVX = saved_useavx;
 657     UseSSE = saved_usesse;
 658 
 659     __ bind(wrapup);
 660     __ popf();
 661     __ pop(rsi);
 662     __ pop(rbx);
 663     __ pop(rbp);
 664     __ ret(0);
 665 
 666 #   undef __
 667 
 668     return start;
 669   };
 670   void generate_vzeroupper(Label& L_wrapup) {
 671 #   define __ _masm->
 672     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 673     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 674     __ jcc(Assembler::notEqual, L_wrapup);
 675     __ movl(rcx, 0x0FFF0FF0);
 676     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 677     __ andl(rcx, Address(rsi, 0));
 678     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 679     __ jcc(Assembler::equal, L_wrapup);
 680     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 681     __ jcc(Assembler::equal, L_wrapup);
 682     // vzeroupper() will use a pre-computed instruction sequence that we
 683     // can't compute until after we've determined CPU capabilities. Use
 684     // uncached variant here directly to be able to bootstrap correctly
 685     __ vzeroupper_uncached();
 686 #   undef __
 687   }
 688   address generate_detect_virt() {
 689     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 690 #   define __ _masm->
 691 
 692     address start = __ pc();
 693 
 694     // Evacuate callee-saved registers
 695     __ push(rbp);
 696     __ push(rbx);
 697     __ push(rsi); // for Windows
 698 
 699 #ifdef _LP64
 700     __ mov(rax, c_rarg0); // CPUID leaf
 701     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 702 #else
 703     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 704     __ movptr(rsi, Address(rsp, 20)); // register array address
 705 #endif
 706 
 707     __ cpuid();
 708 
 709     // Store result to register array
 710     __ movl(Address(rsi,  0), rax);
 711     __ movl(Address(rsi,  4), rbx);
 712     __ movl(Address(rsi,  8), rcx);
 713     __ movl(Address(rsi, 12), rdx);
 714 
 715     // Epilogue
 716     __ pop(rsi);
 717     __ pop(rbx);
 718     __ pop(rbp);
 719     __ ret(0);
 720 
 721 #   undef __
 722 
 723     return start;
 724   };
 725 
 726 
 727   address generate_getCPUIDBrandString(void) {
 728     // Flags to test CPU type.
 729     const uint32_t HS_EFL_AC           = 0x40000;
 730     const uint32_t HS_EFL_ID           = 0x200000;
 731     // Values for when we don't have a CPUID instruction.
 732     const int      CPU_FAMILY_SHIFT = 8;
 733     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 734     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 735 
 736     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 737 
 738     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 739 #   define __ _masm->
 740 
 741     address start = __ pc();
 742 
 743     //
 744     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 745     //
 746     // LP64: rcx and rdx are first and second argument registers on windows
 747 
 748     __ push(rbp);
 749 #ifdef _LP64
 750     __ mov(rbp, c_rarg0); // cpuid_info address
 751 #else
 752     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 753 #endif
 754     __ push(rbx);
 755     __ push(rsi);
 756     __ pushf();          // preserve rbx, and flags
 757     __ pop(rax);
 758     __ push(rax);
 759     __ mov(rcx, rax);
 760     //
 761     // if we are unable to change the AC flag, we have a 386
 762     //
 763     __ xorl(rax, HS_EFL_AC);
 764     __ push(rax);
 765     __ popf();
 766     __ pushf();
 767     __ pop(rax);
 768     __ cmpptr(rax, rcx);
 769     __ jccb(Assembler::notEqual, detect_486);
 770 
 771     __ movl(rax, CPU_FAMILY_386);
 772     __ jmp(done);
 773 
 774     //
 775     // If we are unable to change the ID flag, we have a 486 which does
 776     // not support the "cpuid" instruction.
 777     //
 778     __ bind(detect_486);
 779     __ mov(rax, rcx);
 780     __ xorl(rax, HS_EFL_ID);
 781     __ push(rax);
 782     __ popf();
 783     __ pushf();
 784     __ pop(rax);
 785     __ cmpptr(rcx, rax);
 786     __ jccb(Assembler::notEqual, detect_586);
 787 
 788     __ bind(cpu486);
 789     __ movl(rax, CPU_FAMILY_486);
 790     __ jmp(done);
 791 
 792     //
 793     // At this point, we have a chip which supports the "cpuid" instruction
 794     //
 795     __ bind(detect_586);
 796     __ xorl(rax, rax);
 797     __ cpuid();
 798     __ orl(rax, rax);
 799     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 800                                         // value of at least 1, we give up and
 801                                         // assume a 486
 802 
 803     //
 804     // Extended cpuid(0x80000000) for processor brand string detection
 805     //
 806     __ bind(ext_cpuid);
 807     __ movl(rax, CPUID_EXTENDED_FN);
 808     __ cpuid();
 809     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 810     __ jcc(Assembler::below, done);
 811 
 812     //
 813     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 814     //
 815     __ movl(rax, CPUID_EXTENDED_FN_2);
 816     __ cpuid();
 817     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 818     __ movl(Address(rsi, 0), rax);
 819     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 820     __ movl(Address(rsi, 0), rbx);
 821     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 822     __ movl(Address(rsi, 0), rcx);
 823     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 824     __ movl(Address(rsi,0), rdx);
 825 
 826     //
 827     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 828     //
 829     __ movl(rax, CPUID_EXTENDED_FN_3);
 830     __ cpuid();
 831     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 832     __ movl(Address(rsi, 0), rax);
 833     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 834     __ movl(Address(rsi, 0), rbx);
 835     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 836     __ movl(Address(rsi, 0), rcx);
 837     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 838     __ movl(Address(rsi,0), rdx);
 839 
 840     //
 841     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 842     //
 843     __ movl(rax, CPUID_EXTENDED_FN_4);
 844     __ cpuid();
 845     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 846     __ movl(Address(rsi, 0), rax);
 847     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 848     __ movl(Address(rsi, 0), rbx);
 849     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 850     __ movl(Address(rsi, 0), rcx);
 851     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 852     __ movl(Address(rsi,0), rdx);
 853 
 854     //
 855     // return
 856     //
 857     __ bind(done);
 858     __ popf();
 859     __ pop(rsi);
 860     __ pop(rbx);
 861     __ pop(rbp);
 862     __ ret(0);
 863 
 864 #   undef __
 865 
 866     return start;
 867   };
 868 };
 869 
 870 void VM_Version::get_processor_features() {
 871 
 872   _cpu = 4; // 486 by default
 873   _model = 0;
 874   _stepping = 0;
 875   _features = 0;
 876   _logical_processors_per_package = 1;
 877   // i486 internal cache is both I&D and has a 16-byte line size
 878   _L1_data_cache_line_size = 16;
 879 
 880   // Get raw processor info
 881 
 882   get_cpu_info_stub(&_cpuid_info);
 883 
 884   assert_is_initialized();
 885   _cpu = extended_cpu_family();
 886   _model = extended_cpu_model();
 887   _stepping = cpu_stepping();
 888 
 889   if (cpu_family() > 4) { // it supports CPUID
 890     _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
 891     _cpu_features = _features;   // Preserve features
 892     // Logical processors are only available on P4s and above,
 893     // and only if hyperthreading is available.
 894     _logical_processors_per_package = logical_processor_count();
 895     _L1_data_cache_line_size = L1_line_size();
 896   }
 897 
 898   // xchg and xadd instructions
 899   _supports_atomic_getset4 = true;
 900   _supports_atomic_getadd4 = true;
 901   LP64_ONLY(_supports_atomic_getset8 = true);
 902   LP64_ONLY(_supports_atomic_getadd8 = true);
 903 
 904 #ifdef _LP64
 905   // OS should support SSE for x64 and hardware should support at least SSE2.
 906   if (!VM_Version::supports_sse2()) {
 907     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 908   }
 909   // in 64 bit the use of SSE2 is the minimum
 910   if (UseSSE < 2) UseSSE = 2;
 911 #endif
 912 
 913 #ifdef AMD64
 914   // flush_icache_stub have to be generated first.
 915   // That is why Icache line size is hard coded in ICache class,
 916   // see icache_x86.hpp. It is also the reason why we can't use
 917   // clflush instruction in 32-bit VM since it could be running
 918   // on CPU which does not support it.
 919   //
 920   // The only thing we can do is to verify that flushed
 921   // ICache::line_size has correct value.
 922   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 923   // clflush_size is size in quadwords (8 bytes).
 924   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 925 #endif
 926 
 927 #ifdef _LP64
 928   // assigning this field effectively enables Unsafe.writebackMemory()
 929   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 930   // that is only implemented on x86_64 and only if the OS plays ball
 931   if (os::supports_map_sync()) {
 932     // publish data cache line flush size to generic field, otherwise
 933     // let if default to zero thereby disabling writeback
 934     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 935   }
 936 #endif
 937 
 938   // Check if processor has Intel Ecore
 939   if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && cpu_family() == 6 &&
 940     (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF)) {
 941     FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
 942   }
 943 
 944   if (UseSSE < 4) {
 945     _features &= ~CPU_SSE4_1;
 946     _features &= ~CPU_SSE4_2;
 947   }
 948 
 949   if (UseSSE < 3) {
 950     _features &= ~CPU_SSE3;
 951     _features &= ~CPU_SSSE3;
 952     _features &= ~CPU_SSE4A;
 953   }
 954 
 955   if (UseSSE < 2)
 956     _features &= ~CPU_SSE2;
 957 
 958   if (UseSSE < 1)
 959     _features &= ~CPU_SSE;
 960 
 961   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 962   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 963     UseAVX = 0;
 964   }
 965 
 966   // UseSSE is set to the smaller of what hardware supports and what
 967   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 968   // older Pentiums which do not support it.
 969   int use_sse_limit = 0;
 970   if (UseSSE > 0) {
 971     if (UseSSE > 3 && supports_sse4_1()) {
 972       use_sse_limit = 4;
 973     } else if (UseSSE > 2 && supports_sse3()) {
 974       use_sse_limit = 3;
 975     } else if (UseSSE > 1 && supports_sse2()) {
 976       use_sse_limit = 2;
 977     } else if (UseSSE > 0 && supports_sse()) {
 978       use_sse_limit = 1;
 979     } else {
 980       use_sse_limit = 0;
 981     }
 982   }
 983   if (FLAG_IS_DEFAULT(UseSSE)) {
 984     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 985   } else if (UseSSE > use_sse_limit) {
 986     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 987     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 988   }
 989 
 990   // first try initial setting and detect what we can support
 991   int use_avx_limit = 0;
 992   if (UseAVX > 0) {
 993     if (UseSSE < 4) {
 994       // Don't use AVX if SSE is unavailable or has been disabled.
 995       use_avx_limit = 0;
 996     } else if (UseAVX > 2 && supports_evex()) {
 997       use_avx_limit = 3;
 998     } else if (UseAVX > 1 && supports_avx2()) {
 999       use_avx_limit = 2;
1000     } else if (UseAVX > 0 && supports_avx()) {
1001       use_avx_limit = 1;
1002     } else {
1003       use_avx_limit = 0;
1004     }
1005   }
1006   if (FLAG_IS_DEFAULT(UseAVX)) {
1007     // Don't use AVX-512 on older Skylakes unless explicitly requested.
1008     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1009       FLAG_SET_DEFAULT(UseAVX, 2);
1010     } else {
1011       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1012     }
1013   }
1014 
1015   if (UseAVX > use_avx_limit) {
1016     if (UseSSE < 4) {
1017       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1018     } else {
1019       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1020     }
1021     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1022   }
1023 
1024   if (UseAVX < 3) {
1025     _features &= ~CPU_AVX512F;
1026     _features &= ~CPU_AVX512DQ;
1027     _features &= ~CPU_AVX512CD;
1028     _features &= ~CPU_AVX512BW;
1029     _features &= ~CPU_AVX512VL;
1030     _features &= ~CPU_AVX512_VPOPCNTDQ;
1031     _features &= ~CPU_AVX512_VPCLMULQDQ;
1032     _features &= ~CPU_AVX512_VAES;
1033     _features &= ~CPU_AVX512_VNNI;
1034     _features &= ~CPU_AVX512_VBMI;
1035     _features &= ~CPU_AVX512_VBMI2;
1036     _features &= ~CPU_AVX512_BITALG;
1037     _features &= ~CPU_AVX512_IFMA;
1038     _features &= ~CPU_APX_F;
1039   }
1040 
1041   // Currently APX support is only enabled for targets supporting AVX512VL feature.
1042   bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1043   if (UseAPX && !apx_supported) {
1044     warning("UseAPX is not supported on this CPU, setting it to false");
1045     FLAG_SET_DEFAULT(UseAPX, false);
1046   } else if (FLAG_IS_DEFAULT(UseAPX)) {
1047     FLAG_SET_DEFAULT(UseAPX, apx_supported ? true : false);
1048   }
1049 
1050   if (!UseAPX) {
1051     _features &= ~CPU_APX_F;
1052   }
1053 
1054   if (UseAVX < 2) {
1055     _features &= ~CPU_AVX2;
1056     _features &= ~CPU_AVX_IFMA;
1057   }
1058 
1059   if (UseAVX < 1) {
1060     _features &= ~CPU_AVX;
1061     _features &= ~CPU_VZEROUPPER;
1062     _features &= ~CPU_F16C;
1063   }
1064 
1065   if (logical_processors_per_package() == 1) {
1066     // HT processor could be installed on a system which doesn't support HT.
1067     _features &= ~CPU_HT;
1068   }
1069 
1070   if (is_intel()) { // Intel cpus specific settings
1071     if (is_knights_family()) {
1072       _features &= ~CPU_VZEROUPPER;
1073       _features &= ~CPU_AVX512BW;
1074       _features &= ~CPU_AVX512VL;
1075       _features &= ~CPU_AVX512DQ;
1076       _features &= ~CPU_AVX512_VNNI;
1077       _features &= ~CPU_AVX512_VAES;
1078       _features &= ~CPU_AVX512_VPOPCNTDQ;
1079       _features &= ~CPU_AVX512_VPCLMULQDQ;
1080       _features &= ~CPU_AVX512_VBMI;
1081       _features &= ~CPU_AVX512_VBMI2;
1082       _features &= ~CPU_CLWB;
1083       _features &= ~CPU_FLUSHOPT;
1084       _features &= ~CPU_GFNI;
1085       _features &= ~CPU_AVX512_BITALG;
1086       _features &= ~CPU_AVX512_IFMA;
1087       _features &= ~CPU_AVX_IFMA;
1088     }
1089   }
1090 
1091   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1092     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1093   } else {
1094     _has_intel_jcc_erratum = IntelJccErratumMitigation;
1095   }
1096 
1097   char buf[1024];
1098   int res = jio_snprintf(
1099               buf, sizeof(buf),
1100               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
1101               cores_per_cpu(), threads_per_core(),
1102               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1103   assert(res > 0, "not enough temporary space allocated");
1104   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1105 
1106   _features_string = os::strdup(buf);
1107 
1108   // Use AES instructions if available.
1109   if (supports_aes()) {
1110     if (FLAG_IS_DEFAULT(UseAES)) {
1111       FLAG_SET_DEFAULT(UseAES, true);
1112     }
1113     if (!UseAES) {
1114       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1115         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1116       }
1117       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1118     } else {
1119       if (UseSSE > 2) {
1120         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1121           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1122         }
1123       } else {
1124         // The AES intrinsic stubs require AES instruction support (of course)
1125         // but also require sse3 mode or higher for instructions it use.
1126         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1127           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1128         }
1129         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1130       }
1131 
1132       // --AES-CTR begins--
1133       if (!UseAESIntrinsics) {
1134         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1135           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1136           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1137         }
1138       } else {
1139         if (supports_sse4_1()) {
1140           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1141             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1142           }
1143         } else {
1144            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1145            // but also require sse4.1 mode or higher for instructions it use.
1146           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1147              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1148            }
1149            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1150         }
1151       }
1152       // --AES-CTR ends--
1153     }
1154   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1155     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1156       warning("AES instructions are not available on this CPU");
1157       FLAG_SET_DEFAULT(UseAES, false);
1158     }
1159     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1160       warning("AES intrinsics are not available on this CPU");
1161       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1162     }
1163     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1164       warning("AES-CTR intrinsics are not available on this CPU");
1165       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1166     }
1167   }
1168 
1169   // Use CLMUL instructions if available.
1170   if (supports_clmul()) {
1171     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1172       UseCLMUL = true;
1173     }
1174   } else if (UseCLMUL) {
1175     if (!FLAG_IS_DEFAULT(UseCLMUL))
1176       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1177     FLAG_SET_DEFAULT(UseCLMUL, false);
1178   }
1179 
1180   if (UseCLMUL && (UseSSE > 2)) {
1181     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1182       UseCRC32Intrinsics = true;
1183     }
1184   } else if (UseCRC32Intrinsics) {
1185     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1186       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1187     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1188   }
1189 
1190 #ifdef _LP64
1191   if (supports_avx2()) {
1192     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1193       UseAdler32Intrinsics = true;
1194     }
1195   } else if (UseAdler32Intrinsics) {
1196     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1197       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1198     }
1199     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1200   }
1201 #else
1202   if (UseAdler32Intrinsics) {
1203     warning("Adler32Intrinsics not available on this CPU.");
1204     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1205   }
1206 #endif
1207 
1208   if (supports_sse4_2() && supports_clmul()) {
1209     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1210       UseCRC32CIntrinsics = true;
1211     }
1212   } else if (UseCRC32CIntrinsics) {
1213     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1214       warning("CRC32C intrinsics are not available on this CPU");
1215     }
1216     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1217   }
1218 
1219   // GHASH/GCM intrinsics
1220   if (UseCLMUL && (UseSSE > 2)) {
1221     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1222       UseGHASHIntrinsics = true;
1223     }
1224   } else if (UseGHASHIntrinsics) {
1225     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1226       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1227     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1228   }
1229 
1230 #ifdef _LP64
1231   // ChaCha20 Intrinsics
1232   // As long as the system supports AVX as a baseline we can do a
1233   // SIMD-enabled block function.  StubGenerator makes the determination
1234   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1235   // version.
1236   if (UseAVX >= 1) {
1237       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1238           UseChaCha20Intrinsics = true;
1239       }
1240   } else if (UseChaCha20Intrinsics) {
1241       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1242           warning("ChaCha20 intrinsic requires AVX instructions");
1243       }
1244       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1245   }
1246 #else
1247   // No support currently for ChaCha20 intrinsics on 32-bit platforms
1248   if (UseChaCha20Intrinsics) {
1249       warning("ChaCha20 intrinsics are not available on this CPU.");
1250       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1251   }
1252 #endif // _LP64
1253 
1254   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1255   if (UseAVX >= 2) {
1256     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1257       UseBASE64Intrinsics = true;
1258     }
1259   } else if (UseBASE64Intrinsics) {
1260      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1261       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1262     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1263   }
1264 
1265   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1266     if (FLAG_IS_DEFAULT(UseFMA)) {
1267       UseFMA = true;
1268     }
1269   } else if (UseFMA) {
1270     warning("FMA instructions are not available on this CPU");
1271     FLAG_SET_DEFAULT(UseFMA, false);
1272   }
1273 
1274   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1275     UseMD5Intrinsics = true;
1276   }
1277 
1278   if (supports_sha() LP64_ONLY(|| (supports_avx2() && supports_bmi2()))) {
1279     if (FLAG_IS_DEFAULT(UseSHA)) {
1280       UseSHA = true;
1281     }
1282   } else if (UseSHA) {
1283     warning("SHA instructions are not available on this CPU");
1284     FLAG_SET_DEFAULT(UseSHA, false);
1285   }
1286 
1287   if (supports_sha() && supports_sse4_1() && UseSHA) {
1288     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1289       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1290     }
1291   } else if (UseSHA1Intrinsics) {
1292     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1293     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1294   }
1295 
1296   if (supports_sse4_1() && UseSHA) {
1297     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1298       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1299     }
1300   } else if (UseSHA256Intrinsics) {
1301     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1302     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1303   }
1304 
1305 #ifdef _LP64
1306   // These are only supported on 64-bit
1307   if (UseSHA && supports_avx2() && supports_bmi2()) {
1308     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1309       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1310     }
1311   } else
1312 #endif
1313   if (UseSHA512Intrinsics) {
1314     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1315     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1316   }
1317 
1318   if (UseSHA3Intrinsics) {
1319     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1320     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1321   }
1322 
1323   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1324     FLAG_SET_DEFAULT(UseSHA, false);
1325   }
1326 
1327 #ifdef COMPILER2
1328   if (UseFPUForSpilling) {
1329     if (UseSSE < 2) {
1330       // Only supported with SSE2+
1331       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1332     }
1333   }
1334 #endif
1335 
1336 #if COMPILER2_OR_JVMCI
1337   int max_vector_size = 0;
1338   if (UseSSE < 2) {
1339     // Vectors (in XMM) are only supported with SSE2+
1340     // SSE is always 2 on x64.
1341     max_vector_size = 0;
1342   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1343     // 16 byte vectors (in XMM) are supported with SSE2+
1344     max_vector_size = 16;
1345   } else if (UseAVX == 1 || UseAVX == 2) {
1346     // 32 bytes vectors (in YMM) are only supported with AVX+
1347     max_vector_size = 32;
1348   } else if (UseAVX > 2) {
1349     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1350     max_vector_size = 64;
1351   }
1352 
1353 #ifdef _LP64
1354   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1355 #else
1356   int min_vector_size = 0;
1357 #endif
1358 
1359   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1360     if (MaxVectorSize < min_vector_size) {
1361       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1362       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1363     }
1364     if (MaxVectorSize > max_vector_size) {
1365       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1366       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1367     }
1368     if (!is_power_of_2(MaxVectorSize)) {
1369       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1370       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1371     }
1372   } else {
1373     // If default, use highest supported configuration
1374     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1375   }
1376 
1377 #if defined(COMPILER2) && defined(ASSERT)
1378   if (MaxVectorSize > 0) {
1379     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1380       tty->print_cr("State of YMM registers after signal handle:");
1381       int nreg = 2 LP64_ONLY(+2);
1382       const char* ymm_name[4] = {"0", "7", "8", "15"};
1383       for (int i = 0; i < nreg; i++) {
1384         tty->print("YMM%s:", ymm_name[i]);
1385         for (int j = 7; j >=0; j--) {
1386           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1387         }
1388         tty->cr();
1389       }
1390     }
1391   }
1392 #endif // COMPILER2 && ASSERT
1393 
1394 #ifdef _LP64
1395   if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma())  {
1396     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1397       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1398     }
1399   } else
1400 #endif
1401   if (UsePoly1305Intrinsics) {
1402     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1403     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1404   }
1405 
1406 #ifdef _LP64
1407   if (supports_avx512ifma() && supports_avx512vlbw()) {
1408     if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1409       FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1410     }
1411   } else
1412 #endif
1413   if (UseIntPolyIntrinsics) {
1414     warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1415     FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1416   }
1417 
1418 #ifdef _LP64
1419   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1420     UseMultiplyToLenIntrinsic = true;
1421   }
1422   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1423     UseSquareToLenIntrinsic = true;
1424   }
1425   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1426     UseMulAddIntrinsic = true;
1427   }
1428   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1429     UseMontgomeryMultiplyIntrinsic = true;
1430   }
1431   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1432     UseMontgomerySquareIntrinsic = true;
1433   }
1434 #else
1435   if (UseMultiplyToLenIntrinsic) {
1436     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1437       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1438     }
1439     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1440   }
1441   if (UseMontgomeryMultiplyIntrinsic) {
1442     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1443       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1444     }
1445     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1446   }
1447   if (UseMontgomerySquareIntrinsic) {
1448     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1449       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1450     }
1451     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1452   }
1453   if (UseSquareToLenIntrinsic) {
1454     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1455       warning("squareToLen intrinsic is not available in 32-bit VM");
1456     }
1457     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1458   }
1459   if (UseMulAddIntrinsic) {
1460     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1461       warning("mulAdd intrinsic is not available in 32-bit VM");
1462     }
1463     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1464   }
1465 #endif // _LP64
1466 #endif // COMPILER2_OR_JVMCI
1467 
1468   // On new cpus instructions which update whole XMM register should be used
1469   // to prevent partial register stall due to dependencies on high half.
1470   //
1471   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1472   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1473   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1474   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1475 
1476 
1477   if (is_zx()) { // ZX cpus specific settings
1478     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1479       UseStoreImmI16 = false; // don't use it on ZX cpus
1480     }
1481     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1482       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1483         // Use it on all ZX cpus
1484         UseAddressNop = true;
1485       }
1486     }
1487     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1488       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1489     }
1490     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1491       if (supports_sse3()) {
1492         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1493       } else {
1494         UseXmmRegToRegMoveAll = false;
1495       }
1496     }
1497     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1498 #ifdef COMPILER2
1499       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1500         // For new ZX cpus do the next optimization:
1501         // don't align the beginning of a loop if there are enough instructions
1502         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1503         // in current fetch line (OptoLoopAlignment) or the padding
1504         // is big (> MaxLoopPad).
1505         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1506         // generated NOP instructions. 11 is the largest size of one
1507         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1508         MaxLoopPad = 11;
1509       }
1510 #endif // COMPILER2
1511       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1512         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1513       }
1514       if (supports_sse4_2()) { // new ZX cpus
1515         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1516           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1517         }
1518       }
1519       if (supports_sse4_2()) {
1520         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1521           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1522         }
1523       } else {
1524         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1525           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1526         }
1527         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1528       }
1529     }
1530 
1531     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1532       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1533     }
1534   }
1535 
1536   if (is_amd_family()) { // AMD cpus specific settings
1537     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1538       // Use it on new AMD cpus starting from Opteron.
1539       UseAddressNop = true;
1540     }
1541     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1542       // Use it on new AMD cpus starting from Opteron.
1543       UseNewLongLShift = true;
1544     }
1545     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1546       if (supports_sse4a()) {
1547         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1548       } else {
1549         UseXmmLoadAndClearUpper = false;
1550       }
1551     }
1552     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1553       if (supports_sse4a()) {
1554         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1555       } else {
1556         UseXmmRegToRegMoveAll = false;
1557       }
1558     }
1559     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1560       if (supports_sse4a()) {
1561         UseXmmI2F = true;
1562       } else {
1563         UseXmmI2F = false;
1564       }
1565     }
1566     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1567       if (supports_sse4a()) {
1568         UseXmmI2D = true;
1569       } else {
1570         UseXmmI2D = false;
1571       }
1572     }
1573     if (supports_sse4_2()) {
1574       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1575         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1576       }
1577     } else {
1578       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1579         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1580       }
1581       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1582     }
1583 
1584     // some defaults for AMD family 15h
1585     if (cpu_family() == 0x15) {
1586       // On family 15h processors default is no sw prefetch
1587       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1588         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1589       }
1590       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1591       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1592         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1593       }
1594       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1595       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1596         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1597       }
1598       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1599         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1600       }
1601     }
1602 
1603 #ifdef COMPILER2
1604     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1605       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1606       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1607     }
1608 #endif // COMPILER2
1609 
1610     // Some defaults for AMD family >= 17h && Hygon family 18h
1611     if (cpu_family() >= 0x17) {
1612       // On family >=17h processors use XMM and UnalignedLoadStores
1613       // for Array Copy
1614       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1615         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1616       }
1617       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1618         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1619       }
1620 #ifdef COMPILER2
1621       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1622         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1623       }
1624 #endif
1625     }
1626   }
1627 
1628   if (is_intel()) { // Intel cpus specific settings
1629     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1630       UseStoreImmI16 = false; // don't use it on Intel cpus
1631     }
1632     if (cpu_family() == 6 || cpu_family() == 15) {
1633       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1634         // Use it on all Intel cpus starting from PentiumPro
1635         UseAddressNop = true;
1636       }
1637     }
1638     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1639       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1640     }
1641     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1642       if (supports_sse3()) {
1643         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1644       } else {
1645         UseXmmRegToRegMoveAll = false;
1646       }
1647     }
1648     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1649 #ifdef COMPILER2
1650       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1651         // For new Intel cpus do the next optimization:
1652         // don't align the beginning of a loop if there are enough instructions
1653         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1654         // in current fetch line (OptoLoopAlignment) or the padding
1655         // is big (> MaxLoopPad).
1656         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1657         // generated NOP instructions. 11 is the largest size of one
1658         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1659         MaxLoopPad = 11;
1660       }
1661 #endif // COMPILER2
1662 
1663       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1664         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1665       }
1666       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1667         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1668           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1669         }
1670       }
1671       if (supports_sse4_2()) {
1672         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1673           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1674         }
1675       } else {
1676         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1677           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1678         }
1679         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1680       }
1681     }
1682     if (is_atom_family() || is_knights_family()) {
1683 #ifdef COMPILER2
1684       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1685         OptoScheduling = true;
1686       }
1687 #endif
1688       if (supports_sse4_2()) { // Silvermont
1689         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1690           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1691         }
1692       }
1693       if (FLAG_IS_DEFAULT(UseIncDec)) {
1694         FLAG_SET_DEFAULT(UseIncDec, false);
1695       }
1696     }
1697     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1698       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1699     }
1700 #ifdef COMPILER2
1701     if (UseAVX > 2) {
1702       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1703           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1704            ArrayOperationPartialInlineSize != 0 &&
1705            ArrayOperationPartialInlineSize != 16 &&
1706            ArrayOperationPartialInlineSize != 32 &&
1707            ArrayOperationPartialInlineSize != 64)) {
1708         int inline_size = 0;
1709         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1710           inline_size = 64;
1711         } else if (MaxVectorSize >= 32) {
1712           inline_size = 32;
1713         } else if (MaxVectorSize >= 16) {
1714           inline_size = 16;
1715         }
1716         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1717           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1718         }
1719         ArrayOperationPartialInlineSize = inline_size;
1720       }
1721 
1722       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1723         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1724         if (ArrayOperationPartialInlineSize) {
1725           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1726         } else {
1727           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1728         }
1729       }
1730     }
1731 #endif
1732   }
1733 
1734 #ifdef COMPILER2
1735   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1736     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1737       OptimizeFill = false;
1738     }
1739   }
1740 #endif
1741 
1742 #ifdef _LP64
1743   if (UseSSE42Intrinsics) {
1744     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1745       UseVectorizedMismatchIntrinsic = true;
1746     }
1747   } else if (UseVectorizedMismatchIntrinsic) {
1748     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1749       warning("vectorizedMismatch intrinsics are not available on this CPU");
1750     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1751   }
1752   if (UseAVX >= 2) {
1753     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1754   } else if (UseVectorizedHashCodeIntrinsic) {
1755     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1756       warning("vectorizedHashCode intrinsics are not available on this CPU");
1757     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1758   }
1759 #else
1760   if (UseVectorizedMismatchIntrinsic) {
1761     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1762       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1763     }
1764     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1765   }
1766   if (UseVectorizedHashCodeIntrinsic) {
1767     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1768       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1769     }
1770     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1771   }
1772 #endif // _LP64
1773 
1774   // Use count leading zeros count instruction if available.
1775   if (supports_lzcnt()) {
1776     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1777       UseCountLeadingZerosInstruction = true;
1778     }
1779    } else if (UseCountLeadingZerosInstruction) {
1780     warning("lzcnt instruction is not available on this CPU");
1781     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1782   }
1783 
1784   // Use count trailing zeros instruction if available
1785   if (supports_bmi1()) {
1786     // tzcnt does not require VEX prefix
1787     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1788       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1789         // Don't use tzcnt if BMI1 is switched off on command line.
1790         UseCountTrailingZerosInstruction = false;
1791       } else {
1792         UseCountTrailingZerosInstruction = true;
1793       }
1794     }
1795   } else if (UseCountTrailingZerosInstruction) {
1796     warning("tzcnt instruction is not available on this CPU");
1797     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1798   }
1799 
1800   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1801   // VEX prefix is generated only when AVX > 0.
1802   if (supports_bmi1() && supports_avx()) {
1803     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1804       UseBMI1Instructions = true;
1805     }
1806   } else if (UseBMI1Instructions) {
1807     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1808     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1809   }
1810 
1811   if (supports_bmi2() && supports_avx()) {
1812     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1813       UseBMI2Instructions = true;
1814     }
1815   } else if (UseBMI2Instructions) {
1816     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1817     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1818   }
1819 
1820   // Use population count instruction if available.
1821   if (supports_popcnt()) {
1822     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1823       UsePopCountInstruction = true;
1824     }
1825   } else if (UsePopCountInstruction) {
1826     warning("POPCNT instruction is not available on this CPU");
1827     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1828   }
1829 
1830   // Use fast-string operations if available.
1831   if (supports_erms()) {
1832     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1833       UseFastStosb = true;
1834     }
1835   } else if (UseFastStosb) {
1836     warning("fast-string operations are not available on this CPU");
1837     FLAG_SET_DEFAULT(UseFastStosb, false);
1838   }
1839 
1840   // For AMD Processors use XMM/YMM MOVDQU instructions
1841   // for Object Initialization as default
1842   if (is_amd() && cpu_family() >= 0x19) {
1843     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1844       UseFastStosb = false;
1845     }
1846   }
1847 
1848 #ifdef COMPILER2
1849   if (is_intel() && MaxVectorSize > 16) {
1850     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1851       UseFastStosb = false;
1852     }
1853   }
1854 #endif
1855 
1856   // Use XMM/YMM MOVDQU instruction for Object Initialization
1857   if (UseSSE >= 2 && UseUnalignedLoadStores) {
1858     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1859       UseXMMForObjInit = true;
1860     }
1861   } else if (UseXMMForObjInit) {
1862     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1863     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1864   }
1865 
1866 #ifdef COMPILER2
1867   if (FLAG_IS_DEFAULT(AlignVector)) {
1868     // Modern processors allow misaligned memory operations for vectors.
1869     AlignVector = !UseUnalignedLoadStores;
1870   }
1871 #endif // COMPILER2
1872 
1873   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1874     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1875       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1876     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1877       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1878     }
1879   }
1880 
1881   // Allocation prefetch settings
1882   int cache_line_size = checked_cast<int>(prefetch_data_size());
1883   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1884       (cache_line_size > AllocatePrefetchStepSize)) {
1885     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1886   }
1887 
1888   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1889     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1890     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1891       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1892     }
1893     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1894   }
1895 
1896   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1897     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1898     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1899   }
1900 
1901   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1902     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1903         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1904       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1905     }
1906 #ifdef COMPILER2
1907     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1908       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1909     }
1910 #endif
1911   }
1912 
1913   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1914 #ifdef COMPILER2
1915     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1916       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1917     }
1918 #endif
1919   }
1920 
1921 #ifdef _LP64
1922   // Prefetch settings
1923 
1924   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1925   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1926   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1927   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1928 
1929   // gc copy/scan is disabled if prefetchw isn't supported, because
1930   // Prefetch::write emits an inlined prefetchw on Linux.
1931   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1932   // The used prefetcht0 instruction works for both amd64 and em64t.
1933 
1934   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1935     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1936   }
1937   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1938     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1939   }
1940 #endif
1941 
1942   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1943      (cache_line_size > ContendedPaddingWidth))
1944      ContendedPaddingWidth = cache_line_size;
1945 
1946   // This machine allows unaligned memory accesses
1947   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1948     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1949   }
1950 
1951 #ifndef PRODUCT
1952   if (log_is_enabled(Info, os, cpu)) {
1953     LogStream ls(Log(os, cpu)::info());
1954     outputStream* log = &ls;
1955     log->print_cr("Logical CPUs per core: %u",
1956                   logical_processors_per_package());
1957     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1958     log->print("UseSSE=%d", UseSSE);
1959     if (UseAVX > 0) {
1960       log->print("  UseAVX=%d", UseAVX);
1961     }
1962     if (UseAES) {
1963       log->print("  UseAES=1");
1964     }
1965 #ifdef COMPILER2
1966     if (MaxVectorSize > 0) {
1967       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1968     }
1969 #endif
1970     log->cr();
1971     log->print("Allocation");
1972     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1973       log->print_cr(": no prefetching");
1974     } else {
1975       log->print(" prefetching: ");
1976       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1977         log->print("PREFETCHW");
1978       } else if (UseSSE >= 1) {
1979         if (AllocatePrefetchInstr == 0) {
1980           log->print("PREFETCHNTA");
1981         } else if (AllocatePrefetchInstr == 1) {
1982           log->print("PREFETCHT0");
1983         } else if (AllocatePrefetchInstr == 2) {
1984           log->print("PREFETCHT2");
1985         } else if (AllocatePrefetchInstr == 3) {
1986           log->print("PREFETCHW");
1987         }
1988       }
1989       if (AllocatePrefetchLines > 1) {
1990         log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1991       } else {
1992         log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1993       }
1994     }
1995 
1996     if (PrefetchCopyIntervalInBytes > 0) {
1997       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1998     }
1999     if (PrefetchScanIntervalInBytes > 0) {
2000       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
2001     }
2002     if (ContendedPaddingWidth > 0) {
2003       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
2004     }
2005   }
2006 #endif // !PRODUCT
2007   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
2008       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
2009   }
2010   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
2011       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
2012   }
2013 }
2014 
2015 void VM_Version::print_platform_virtualization_info(outputStream* st) {
2016   VirtualizationType vrt = VM_Version::get_detected_virtualization();
2017   if (vrt == XenHVM) {
2018     st->print_cr("Xen hardware-assisted virtualization detected");
2019   } else if (vrt == KVM) {
2020     st->print_cr("KVM virtualization detected");
2021   } else if (vrt == VMWare) {
2022     st->print_cr("VMWare virtualization detected");
2023     VirtualizationSupport::print_virtualization_info(st);
2024   } else if (vrt == HyperV) {
2025     st->print_cr("Hyper-V virtualization detected");
2026   } else if (vrt == HyperVRole) {
2027     st->print_cr("Hyper-V role detected");
2028   }
2029 }
2030 
2031 bool VM_Version::compute_has_intel_jcc_erratum() {
2032   if (!is_intel_family_core()) {
2033     // Only Intel CPUs are affected.
2034     return false;
2035   }
2036   // The following table of affected CPUs is based on the following document released by Intel:
2037   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2038   switch (_model) {
2039   case 0x8E:
2040     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2041     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2042     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2043     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2044     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2045     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2046     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2047     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2048     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2049     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2050   case 0x4E:
2051     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2052     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2053     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2054     return _stepping == 0x3;
2055   case 0x55:
2056     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2057     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2058     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2059     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2060     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2061     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2062     return _stepping == 0x4 || _stepping == 0x7;
2063   case 0x5E:
2064     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2065     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2066     return _stepping == 0x3;
2067   case 0x9E:
2068     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2069     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2070     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2071     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2072     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2073     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2074     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2075     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2076     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2077     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2078     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2079     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2080     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2081     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2082     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2083   case 0xA5:
2084     // Not in Intel documentation.
2085     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2086     return true;
2087   case 0xA6:
2088     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2089     return _stepping == 0x0;
2090   case 0xAE:
2091     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2092     return _stepping == 0xA;
2093   default:
2094     // If we are running on another intel machine not recognized in the table, we are okay.
2095     return false;
2096   }
2097 }
2098 
2099 // On Xen, the cpuid instruction returns
2100 //  eax / registers[0]: Version of Xen
2101 //  ebx / registers[1]: chars 'XenV'
2102 //  ecx / registers[2]: chars 'MMXe'
2103 //  edx / registers[3]: chars 'nVMM'
2104 //
2105 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2106 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2107 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2108 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2109 //
2110 // more information :
2111 // https://kb.vmware.com/s/article/1009458
2112 //
2113 void VM_Version::check_virtualizations() {
2114   uint32_t registers[4] = {0};
2115   char signature[13] = {0};
2116 
2117   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2118   // from 0x40000000 until 0x40010000.
2119   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2120   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2121     detect_virt_stub(leaf, registers);
2122     memcpy(signature, &registers[1], 12);
2123 
2124     if (strncmp("VMwareVMware", signature, 12) == 0) {
2125       Abstract_VM_Version::_detected_virtualization = VMWare;
2126       // check for extended metrics from guestlib
2127       VirtualizationSupport::initialize();
2128     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2129       Abstract_VM_Version::_detected_virtualization = HyperV;
2130 #ifdef _WINDOWS
2131       // CPUID leaf 0x40000007 is available to the root partition only.
2132       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2133       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2134       detect_virt_stub(0x40000007, registers);
2135       if ((registers[0] != 0x0) ||
2136           (registers[1] != 0x0) ||
2137           (registers[2] != 0x0) ||
2138           (registers[3] != 0x0)) {
2139         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2140       }
2141 #endif
2142     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2143       Abstract_VM_Version::_detected_virtualization = KVM;
2144     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2145       Abstract_VM_Version::_detected_virtualization = XenHVM;
2146     }
2147   }
2148 }
2149 
2150 #ifdef COMPILER2
2151 // Determine if it's running on Cascade Lake using default options.
2152 bool VM_Version::is_default_intel_cascade_lake() {
2153   return FLAG_IS_DEFAULT(UseAVX) &&
2154          FLAG_IS_DEFAULT(MaxVectorSize) &&
2155          UseAVX > 2 &&
2156          is_intel_cascade_lake();
2157 }
2158 #endif
2159 
2160 bool VM_Version::is_intel_cascade_lake() {
2161   return is_intel_skylake() && _stepping >= 5;
2162 }
2163 
2164 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2165 // for implementing the array copy and clear operations.
2166 // The Intel platforms that supports the serialize instruction
2167 // has improved implementation of 64-byte load/stores and so the default
2168 // threshold is set to 0 for these platforms.
2169 int VM_Version::avx3_threshold() {
2170   return (is_intel_family_core() &&
2171           supports_serialize() &&
2172           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2173 }
2174 
2175 #if defined(_LP64)
2176 void VM_Version::clear_apx_test_state() {
2177   clear_apx_test_state_stub();
2178 }
2179 #endif
2180 
2181 static bool _vm_version_initialized = false;
2182 
2183 void VM_Version::initialize() {
2184   ResourceMark rm;
2185   // Making this stub must be FIRST use of assembler
2186   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2187   if (stub_blob == nullptr) {
2188     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2189   }
2190   CodeBuffer c(stub_blob);
2191   VM_Version_StubGenerator g(&c);
2192 
2193   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2194                                      g.generate_get_cpu_info());
2195   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2196                                      g.generate_detect_virt());
2197 
2198 #if defined(_LP64)
2199   clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2200                                      g.clear_apx_test_state());
2201 #endif
2202   get_processor_features();
2203 
2204   LP64_ONLY(Assembler::precompute_instructions();)
2205 
2206   if (VM_Version::supports_hv()) { // Supports hypervisor
2207     check_virtualizations();
2208   }
2209   _vm_version_initialized = true;
2210 }
2211 
2212 typedef enum {
2213    CPU_FAMILY_8086_8088  = 0,
2214    CPU_FAMILY_INTEL_286  = 2,
2215    CPU_FAMILY_INTEL_386  = 3,
2216    CPU_FAMILY_INTEL_486  = 4,
2217    CPU_FAMILY_PENTIUM    = 5,
2218    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2219    CPU_FAMILY_PENTIUM_4  = 0xF
2220 } FamilyFlag;
2221 
2222 typedef enum {
2223   RDTSCP_FLAG  = 0x08000000, // bit 27
2224   INTEL64_FLAG = 0x20000000  // bit 29
2225 } _featureExtendedEdxFlag;
2226 
2227 typedef enum {
2228    FPU_FLAG     = 0x00000001,
2229    VME_FLAG     = 0x00000002,
2230    DE_FLAG      = 0x00000004,
2231    PSE_FLAG     = 0x00000008,
2232    TSC_FLAG     = 0x00000010,
2233    MSR_FLAG     = 0x00000020,
2234    PAE_FLAG     = 0x00000040,
2235    MCE_FLAG     = 0x00000080,
2236    CX8_FLAG     = 0x00000100,
2237    APIC_FLAG    = 0x00000200,
2238    SEP_FLAG     = 0x00000800,
2239    MTRR_FLAG    = 0x00001000,
2240    PGE_FLAG     = 0x00002000,
2241    MCA_FLAG     = 0x00004000,
2242    CMOV_FLAG    = 0x00008000,
2243    PAT_FLAG     = 0x00010000,
2244    PSE36_FLAG   = 0x00020000,
2245    PSNUM_FLAG   = 0x00040000,
2246    CLFLUSH_FLAG = 0x00080000,
2247    DTS_FLAG     = 0x00200000,
2248    ACPI_FLAG    = 0x00400000,
2249    MMX_FLAG     = 0x00800000,
2250    FXSR_FLAG    = 0x01000000,
2251    SSE_FLAG     = 0x02000000,
2252    SSE2_FLAG    = 0x04000000,
2253    SS_FLAG      = 0x08000000,
2254    HTT_FLAG     = 0x10000000,
2255    TM_FLAG      = 0x20000000
2256 } FeatureEdxFlag;
2257 
2258 static BufferBlob* cpuid_brand_string_stub_blob;
2259 static const int   cpuid_brand_string_stub_size = 550;
2260 
2261 extern "C" {
2262   typedef void (*getCPUIDBrandString_stub_t)(void*);
2263 }
2264 
2265 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2266 
2267 // VM_Version statics
2268 enum {
2269   ExtendedFamilyIdLength_INTEL = 16,
2270   ExtendedFamilyIdLength_AMD   = 24
2271 };
2272 
2273 const size_t VENDOR_LENGTH = 13;
2274 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2275 static char* _cpu_brand_string = nullptr;
2276 static int64_t _max_qualified_cpu_frequency = 0;
2277 
2278 static int _no_of_threads = 0;
2279 static int _no_of_cores = 0;
2280 
2281 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2282   "8086/8088",
2283   "",
2284   "286",
2285   "386",
2286   "486",
2287   "Pentium",
2288   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2289   "",
2290   "",
2291   "",
2292   "",
2293   "",
2294   "",
2295   "",
2296   "",
2297   "Pentium 4"
2298 };
2299 
2300 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2301   "",
2302   "",
2303   "",
2304   "",
2305   "5x86",
2306   "K5/K6",
2307   "Athlon/AthlonXP",
2308   "",
2309   "",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "",
2315   "",
2316   "Opteron/Athlon64",
2317   "Opteron QC/Phenom",  // Barcelona et.al.
2318   "",
2319   "",
2320   "",
2321   "",
2322   "",
2323   "",
2324   "Zen"
2325 };
2326 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2327 // September 2013, Vol 3C Table 35-1
2328 const char* const _model_id_pentium_pro[] = {
2329   "",
2330   "Pentium Pro",
2331   "",
2332   "Pentium II model 3",
2333   "",
2334   "Pentium II model 5/Xeon/Celeron",
2335   "Celeron",
2336   "Pentium III/Pentium III Xeon",
2337   "Pentium III/Pentium III Xeon",
2338   "Pentium M model 9",    // Yonah
2339   "Pentium III, model A",
2340   "Pentium III, model B",
2341   "",
2342   "Pentium M model D",    // Dothan
2343   "",
2344   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2345   "",
2346   "",
2347   "",
2348   "",
2349   "",
2350   "",
2351   "Celeron",              // 0x16 Celeron 65nm
2352   "Core 2",               // 0x17 Penryn / Harpertown
2353   "",
2354   "",
2355   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2356   "Atom",                 // 0x1B Z5xx series Silverthorn
2357   "",
2358   "Core 2",               // 0x1D Dunnington (6-core)
2359   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2360   "",
2361   "",
2362   "",
2363   "",
2364   "",
2365   "",
2366   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2367   "",
2368   "",
2369   "",                     // 0x28
2370   "",
2371   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2372   "",
2373   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2374   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2375   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2376   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2377   "",
2378   "",
2379   "",
2380   "",
2381   "",
2382   "",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "Ivy Bridge",           // 0x3a
2388   "",
2389   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2390   "",                     // 0x3d "Next Generation Intel Core Processor"
2391   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2392   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2399   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2400   nullptr
2401 };
2402 
2403 /* Brand ID is for back compatibility
2404  * Newer CPUs uses the extended brand string */
2405 const char* const _brand_id[] = {
2406   "",
2407   "Celeron processor",
2408   "Pentium III processor",
2409   "Intel Pentium III Xeon processor",
2410   "",
2411   "",
2412   "",
2413   "",
2414   "Intel Pentium 4 processor",
2415   nullptr
2416 };
2417 
2418 
2419 const char* const _feature_edx_id[] = {
2420   "On-Chip FPU",
2421   "Virtual Mode Extensions",
2422   "Debugging Extensions",
2423   "Page Size Extensions",
2424   "Time Stamp Counter",
2425   "Model Specific Registers",
2426   "Physical Address Extension",
2427   "Machine Check Exceptions",
2428   "CMPXCHG8B Instruction",
2429   "On-Chip APIC",
2430   "",
2431   "Fast System Call",
2432   "Memory Type Range Registers",
2433   "Page Global Enable",
2434   "Machine Check Architecture",
2435   "Conditional Mov Instruction",
2436   "Page Attribute Table",
2437   "36-bit Page Size Extension",
2438   "Processor Serial Number",
2439   "CLFLUSH Instruction",
2440   "",
2441   "Debug Trace Store feature",
2442   "ACPI registers in MSR space",
2443   "Intel Architecture MMX Technology",
2444   "Fast Float Point Save and Restore",
2445   "Streaming SIMD extensions",
2446   "Streaming SIMD extensions 2",
2447   "Self-Snoop",
2448   "Hyper Threading",
2449   "Thermal Monitor",
2450   "",
2451   "Pending Break Enable"
2452 };
2453 
2454 const char* const _feature_extended_edx_id[] = {
2455   "",
2456   "",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "SYSCALL/SYSRET",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   "",
2473   "",
2474   "",
2475   "Execute Disable Bit",
2476   "",
2477   "",
2478   "",
2479   "",
2480   "",
2481   "",
2482   "RDTSCP",
2483   "",
2484   "Intel 64 Architecture",
2485   "",
2486   ""
2487 };
2488 
2489 const char* const _feature_ecx_id[] = {
2490   "Streaming SIMD Extensions 3",
2491   "PCLMULQDQ",
2492   "64-bit DS Area",
2493   "MONITOR/MWAIT instructions",
2494   "CPL Qualified Debug Store",
2495   "Virtual Machine Extensions",
2496   "Safer Mode Extensions",
2497   "Enhanced Intel SpeedStep technology",
2498   "Thermal Monitor 2",
2499   "Supplemental Streaming SIMD Extensions 3",
2500   "L1 Context ID",
2501   "",
2502   "Fused Multiply-Add",
2503   "CMPXCHG16B",
2504   "xTPR Update Control",
2505   "Perfmon and Debug Capability",
2506   "",
2507   "Process-context identifiers",
2508   "Direct Cache Access",
2509   "Streaming SIMD extensions 4.1",
2510   "Streaming SIMD extensions 4.2",
2511   "x2APIC",
2512   "MOVBE",
2513   "Popcount instruction",
2514   "TSC-Deadline",
2515   "AESNI",
2516   "XSAVE",
2517   "OSXSAVE",
2518   "AVX",
2519   "F16C",
2520   "RDRAND",
2521   ""
2522 };
2523 
2524 const char* const _feature_extended_ecx_id[] = {
2525   "LAHF/SAHF instruction support",
2526   "Core multi-processor legacy mode",
2527   "",
2528   "",
2529   "",
2530   "Advanced Bit Manipulations: LZCNT",
2531   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2532   "Misaligned SSE mode",
2533   "",
2534   "",
2535   "",
2536   "",
2537   "",
2538   "",
2539   "",
2540   "",
2541   "",
2542   "",
2543   "",
2544   "",
2545   "",
2546   "",
2547   "",
2548   "",
2549   "",
2550   "",
2551   "",
2552   "",
2553   "",
2554   "",
2555   "",
2556   ""
2557 };
2558 
2559 void VM_Version::initialize_tsc(void) {
2560   ResourceMark rm;
2561 
2562   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2563   if (cpuid_brand_string_stub_blob == nullptr) {
2564     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2565   }
2566   CodeBuffer c(cpuid_brand_string_stub_blob);
2567   VM_Version_StubGenerator g(&c);
2568   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2569                                    g.generate_getCPUIDBrandString());
2570 }
2571 
2572 const char* VM_Version::cpu_model_description(void) {
2573   uint32_t cpu_family = extended_cpu_family();
2574   uint32_t cpu_model = extended_cpu_model();
2575   const char* model = nullptr;
2576 
2577   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2578     for (uint32_t i = 0; i <= cpu_model; i++) {
2579       model = _model_id_pentium_pro[i];
2580       if (model == nullptr) {
2581         break;
2582       }
2583     }
2584   }
2585   return model;
2586 }
2587 
2588 const char* VM_Version::cpu_brand_string(void) {
2589   if (_cpu_brand_string == nullptr) {
2590     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2591     if (nullptr == _cpu_brand_string) {
2592       return nullptr;
2593     }
2594     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2595     if (ret_val != OS_OK) {
2596       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2597       _cpu_brand_string = nullptr;
2598     }
2599   }
2600   return _cpu_brand_string;
2601 }
2602 
2603 const char* VM_Version::cpu_brand(void) {
2604   const char*  brand  = nullptr;
2605 
2606   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2607     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2608     brand = _brand_id[0];
2609     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2610       brand = _brand_id[i];
2611     }
2612   }
2613   return brand;
2614 }
2615 
2616 bool VM_Version::cpu_is_em64t(void) {
2617   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2618 }
2619 
2620 bool VM_Version::is_netburst(void) {
2621   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2622 }
2623 
2624 bool VM_Version::supports_tscinv_ext(void) {
2625   if (!supports_tscinv_bit()) {
2626     return false;
2627   }
2628 
2629   if (is_intel()) {
2630     return true;
2631   }
2632 
2633   if (is_amd()) {
2634     return !is_amd_Barcelona();
2635   }
2636 
2637   if (is_hygon()) {
2638     return true;
2639   }
2640 
2641   return false;
2642 }
2643 
2644 void VM_Version::resolve_cpu_information_details(void) {
2645 
2646   // in future we want to base this information on proper cpu
2647   // and cache topology enumeration such as:
2648   // Intel 64 Architecture Processor Topology Enumeration
2649   // which supports system cpu and cache topology enumeration
2650   // either using 2xAPICIDs or initial APICIDs
2651 
2652   // currently only rough cpu information estimates
2653   // which will not necessarily reflect the exact configuration of the system
2654 
2655   // this is the number of logical hardware threads
2656   // visible to the operating system
2657   _no_of_threads = os::processor_count();
2658 
2659   // find out number of threads per cpu package
2660   int threads_per_package = threads_per_core() * cores_per_cpu();
2661 
2662   // use amount of threads visible to the process in order to guess number of sockets
2663   _no_of_sockets = _no_of_threads / threads_per_package;
2664 
2665   // process might only see a subset of the total number of threads
2666   // from a single processor package. Virtualization/resource management for example.
2667   // If so then just write a hard 1 as num of pkgs.
2668   if (0 == _no_of_sockets) {
2669     _no_of_sockets = 1;
2670   }
2671 
2672   // estimate the number of cores
2673   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2674 }
2675 
2676 
2677 const char* VM_Version::cpu_family_description(void) {
2678   int cpu_family_id = extended_cpu_family();
2679   if (is_amd()) {
2680     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2681       return _family_id_amd[cpu_family_id];
2682     }
2683   }
2684   if (is_intel()) {
2685     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2686       return cpu_model_description();
2687     }
2688     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2689       return _family_id_intel[cpu_family_id];
2690     }
2691   }
2692   if (is_hygon()) {
2693     return "Dhyana";
2694   }
2695   return "Unknown x86";
2696 }
2697 
2698 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2699   assert(buf != nullptr, "buffer is null!");
2700   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2701 
2702   const char* cpu_type = nullptr;
2703   const char* x64 = nullptr;
2704 
2705   if (is_intel()) {
2706     cpu_type = "Intel";
2707     x64 = cpu_is_em64t() ? " Intel64" : "";
2708   } else if (is_amd()) {
2709     cpu_type = "AMD";
2710     x64 = cpu_is_em64t() ? " AMD64" : "";
2711   } else if (is_hygon()) {
2712     cpu_type = "Hygon";
2713     x64 = cpu_is_em64t() ? " AMD64" : "";
2714   } else {
2715     cpu_type = "Unknown x86";
2716     x64 = cpu_is_em64t() ? " x86_64" : "";
2717   }
2718 
2719   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2720     cpu_type,
2721     cpu_family_description(),
2722     supports_ht() ? " (HT)" : "",
2723     supports_sse3() ? " SSE3" : "",
2724     supports_ssse3() ? " SSSE3" : "",
2725     supports_sse4_1() ? " SSE4.1" : "",
2726     supports_sse4_2() ? " SSE4.2" : "",
2727     supports_sse4a() ? " SSE4A" : "",
2728     is_netburst() ? " Netburst" : "",
2729     is_intel_family_core() ? " Core" : "",
2730     x64);
2731 
2732   return OS_OK;
2733 }
2734 
2735 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2736   assert(buf != nullptr, "buffer is null!");
2737   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2738   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2739 
2740   // invoke newly generated asm code to fetch CPU Brand String
2741   getCPUIDBrandString_stub(&_cpuid_info);
2742 
2743   // fetch results into buffer
2744   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2745   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2746   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2747   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2748   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2749   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2750   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2751   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2752   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2753   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2754   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2755   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2756 
2757   return OS_OK;
2758 }
2759 
2760 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2761   guarantee(buf != nullptr, "buffer is null!");
2762   guarantee(buf_len > 0, "buffer len not enough!");
2763 
2764   unsigned int flag = 0;
2765   unsigned int fi = 0;
2766   size_t       written = 0;
2767   const char*  prefix = "";
2768 
2769 #define WRITE_TO_BUF(string)                                                          \
2770   {                                                                                   \
2771     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2772     if (res < 0) {                                                                    \
2773       return buf_len - 1;                                                             \
2774     }                                                                                 \
2775     written += res;                                                                   \
2776     if (prefix[0] == '\0') {                                                          \
2777       prefix = ", ";                                                                  \
2778     }                                                                                 \
2779   }
2780 
2781   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2782     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2783       continue; /* no hyperthreading */
2784     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2785       continue; /* no fast system call */
2786     }
2787     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2788       WRITE_TO_BUF(_feature_edx_id[fi]);
2789     }
2790   }
2791 
2792   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2793     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2794       WRITE_TO_BUF(_feature_ecx_id[fi]);
2795     }
2796   }
2797 
2798   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2799     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2800       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2801     }
2802   }
2803 
2804   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2805     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2806       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2807     }
2808   }
2809 
2810   if (supports_tscinv_bit()) {
2811       WRITE_TO_BUF("Invariant TSC");
2812   }
2813 
2814   return written;
2815 }
2816 
2817 /**
2818  * Write a detailed description of the cpu to a given buffer, including
2819  * feature set.
2820  */
2821 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2822   assert(buf != nullptr, "buffer is null!");
2823   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2824 
2825   static const char* unknown = "<unknown>";
2826   char               vendor_id[VENDOR_LENGTH];
2827   const char*        family = nullptr;
2828   const char*        model = nullptr;
2829   const char*        brand = nullptr;
2830   int                outputLen = 0;
2831 
2832   family = cpu_family_description();
2833   if (family == nullptr) {
2834     family = unknown;
2835   }
2836 
2837   model = cpu_model_description();
2838   if (model == nullptr) {
2839     model = unknown;
2840   }
2841 
2842   brand = cpu_brand_string();
2843 
2844   if (brand == nullptr) {
2845     brand = cpu_brand();
2846     if (brand == nullptr) {
2847       brand = unknown;
2848     }
2849   }
2850 
2851   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2852   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2853   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2854   vendor_id[VENDOR_LENGTH-1] = '\0';
2855 
2856   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2857     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2858     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2859     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2860     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2861     "Supports: ",
2862     brand,
2863     vendor_id,
2864     family,
2865     extended_cpu_family(),
2866     model,
2867     extended_cpu_model(),
2868     cpu_stepping(),
2869     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2870     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2871     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2872     _cpuid_info.std_cpuid1_eax.value,
2873     _cpuid_info.std_cpuid1_ebx.value,
2874     _cpuid_info.std_cpuid1_ecx.value,
2875     _cpuid_info.std_cpuid1_edx.value,
2876     _cpuid_info.ext_cpuid1_eax,
2877     _cpuid_info.ext_cpuid1_ebx,
2878     _cpuid_info.ext_cpuid1_ecx,
2879     _cpuid_info.ext_cpuid1_edx);
2880 
2881   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2882     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2883     return OS_ERR;
2884   }
2885 
2886   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2887 
2888   return OS_OK;
2889 }
2890 
2891 
2892 // Fill in Abstract_VM_Version statics
2893 void VM_Version::initialize_cpu_information() {
2894   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2895   assert(!_initialized, "shouldn't be initialized yet");
2896   resolve_cpu_information_details();
2897 
2898   // initialize cpu_name and cpu_desc
2899   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2900   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2901   _initialized = true;
2902 }
2903 
2904 /**
2905  *  For information about extracting the frequency from the cpu brand string, please see:
2906  *
2907  *    Intel Processor Identification and the CPUID Instruction
2908  *    Application Note 485
2909  *    May 2012
2910  *
2911  * The return value is the frequency in Hz.
2912  */
2913 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2914   const char* const brand_string = cpu_brand_string();
2915   if (brand_string == nullptr) {
2916     return 0;
2917   }
2918   const int64_t MEGA = 1000000;
2919   int64_t multiplier = 0;
2920   int64_t frequency = 0;
2921   uint8_t idx = 0;
2922   // The brand string buffer is at most 48 bytes.
2923   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2924   for (; idx < 48-2; ++idx) {
2925     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2926     // Search brand string for "yHz" where y is M, G, or T.
2927     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2928       if (brand_string[idx] == 'M') {
2929         multiplier = MEGA;
2930       } else if (brand_string[idx] == 'G') {
2931         multiplier = MEGA * 1000;
2932       } else if (brand_string[idx] == 'T') {
2933         multiplier = MEGA * MEGA;
2934       }
2935       break;
2936     }
2937   }
2938   if (multiplier > 0) {
2939     // Compute frequency (in Hz) from brand string.
2940     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2941       frequency =  (brand_string[idx-4] - '0') * multiplier;
2942       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2943       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2944     } else { // format is "xxxx"
2945       frequency =  (brand_string[idx-4] - '0') * 1000;
2946       frequency += (brand_string[idx-3] - '0') * 100;
2947       frequency += (brand_string[idx-2] - '0') * 10;
2948       frequency += (brand_string[idx-1] - '0');
2949       frequency *= multiplier;
2950     }
2951   }
2952   return frequency;
2953 }
2954 
2955 
2956 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2957   if (_max_qualified_cpu_frequency == 0) {
2958     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2959   }
2960   return _max_qualified_cpu_frequency;
2961 }
2962 
2963 uint64_t VM_Version::CpuidInfo::feature_flags() const {
2964   uint64_t result = 0;
2965   if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2966     result |= CPU_CX8;
2967   if (std_cpuid1_edx.bits.cmov != 0)
2968     result |= CPU_CMOV;
2969   if (std_cpuid1_edx.bits.clflush != 0)
2970     result |= CPU_FLUSH;
2971 #ifdef _LP64
2972   // clflush should always be available on x86_64
2973   // if not we are in real trouble because we rely on it
2974   // to flush the code cache.
2975   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2976 #endif
2977   if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2978       ext_cpuid1_edx.bits.fxsr != 0))
2979     result |= CPU_FXSR;
2980   // HT flag is set for multi-core processors also.
2981   if (threads_per_core() > 1)
2982     result |= CPU_HT;
2983   if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2984       ext_cpuid1_edx.bits.mmx != 0))
2985     result |= CPU_MMX;
2986   if (std_cpuid1_edx.bits.sse != 0)
2987     result |= CPU_SSE;
2988   if (std_cpuid1_edx.bits.sse2 != 0)
2989     result |= CPU_SSE2;
2990   if (std_cpuid1_ecx.bits.sse3 != 0)
2991     result |= CPU_SSE3;
2992   if (std_cpuid1_ecx.bits.ssse3 != 0)
2993     result |= CPU_SSSE3;
2994   if (std_cpuid1_ecx.bits.sse4_1 != 0)
2995     result |= CPU_SSE4_1;
2996   if (std_cpuid1_ecx.bits.sse4_2 != 0)
2997     result |= CPU_SSE4_2;
2998   if (std_cpuid1_ecx.bits.popcnt != 0)
2999     result |= CPU_POPCNT;
3000   if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
3001       xem_xcr0_eax.bits.apx_f != 0) {
3002     result |= CPU_APX_F;
3003   }
3004   if (std_cpuid1_ecx.bits.avx != 0 &&
3005       std_cpuid1_ecx.bits.osxsave != 0 &&
3006       xem_xcr0_eax.bits.sse != 0 &&
3007       xem_xcr0_eax.bits.ymm != 0) {
3008     result |= CPU_AVX;
3009     result |= CPU_VZEROUPPER;
3010     if (std_cpuid1_ecx.bits.f16c != 0)
3011       result |= CPU_F16C;
3012     if (sef_cpuid7_ebx.bits.avx2 != 0) {
3013       result |= CPU_AVX2;
3014       if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
3015         result |= CPU_AVX_IFMA;
3016     }
3017     if (sef_cpuid7_ecx.bits.gfni != 0)
3018         result |= CPU_GFNI;
3019     if (sef_cpuid7_ebx.bits.avx512f != 0 &&
3020         xem_xcr0_eax.bits.opmask != 0 &&
3021         xem_xcr0_eax.bits.zmm512 != 0 &&
3022         xem_xcr0_eax.bits.zmm32 != 0) {
3023       result |= CPU_AVX512F;
3024       if (sef_cpuid7_ebx.bits.avx512cd != 0)
3025         result |= CPU_AVX512CD;
3026       if (sef_cpuid7_ebx.bits.avx512dq != 0)
3027         result |= CPU_AVX512DQ;
3028       if (sef_cpuid7_ebx.bits.avx512ifma != 0)
3029         result |= CPU_AVX512_IFMA;
3030       if (sef_cpuid7_ebx.bits.avx512pf != 0)
3031         result |= CPU_AVX512PF;
3032       if (sef_cpuid7_ebx.bits.avx512er != 0)
3033         result |= CPU_AVX512ER;
3034       if (sef_cpuid7_ebx.bits.avx512bw != 0)
3035         result |= CPU_AVX512BW;
3036       if (sef_cpuid7_ebx.bits.avx512vl != 0)
3037         result |= CPU_AVX512VL;
3038       if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3039         result |= CPU_AVX512_VPOPCNTDQ;
3040       if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3041         result |= CPU_AVX512_VPCLMULQDQ;
3042       if (sef_cpuid7_ecx.bits.vaes != 0)
3043         result |= CPU_AVX512_VAES;
3044       if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3045         result |= CPU_AVX512_VNNI;
3046       if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3047         result |= CPU_AVX512_BITALG;
3048       if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3049         result |= CPU_AVX512_VBMI;
3050       if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3051         result |= CPU_AVX512_VBMI2;
3052     }
3053   }
3054   if (std_cpuid1_ecx.bits.hv != 0)
3055     result |= CPU_HV;
3056   if (sef_cpuid7_ebx.bits.bmi1 != 0)
3057     result |= CPU_BMI1;
3058   if (std_cpuid1_edx.bits.tsc != 0)
3059     result |= CPU_TSC;
3060   if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3061     result |= CPU_TSCINV_BIT;
3062   if (std_cpuid1_ecx.bits.aes != 0)
3063     result |= CPU_AES;
3064   if (sef_cpuid7_ebx.bits.erms != 0)
3065     result |= CPU_ERMS;
3066   if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3067     result |= CPU_FSRM;
3068   if (std_cpuid1_ecx.bits.clmul != 0)
3069     result |= CPU_CLMUL;
3070   if (sef_cpuid7_ebx.bits.rtm != 0)
3071     result |= CPU_RTM;
3072   if (sef_cpuid7_ebx.bits.adx != 0)
3073      result |= CPU_ADX;
3074   if (sef_cpuid7_ebx.bits.bmi2 != 0)
3075     result |= CPU_BMI2;
3076   if (sef_cpuid7_ebx.bits.sha != 0)
3077     result |= CPU_SHA;
3078   if (std_cpuid1_ecx.bits.fma != 0)
3079     result |= CPU_FMA;
3080   if (sef_cpuid7_ebx.bits.clflushopt != 0)
3081     result |= CPU_FLUSHOPT;
3082   if (ext_cpuid1_edx.bits.rdtscp != 0)
3083     result |= CPU_RDTSCP;
3084   if (sef_cpuid7_ecx.bits.rdpid != 0)
3085     result |= CPU_RDPID;
3086 
3087   // AMD|Hygon features.
3088   if (is_amd_family()) {
3089     if ((ext_cpuid1_edx.bits.tdnow != 0) ||
3090         (ext_cpuid1_ecx.bits.prefetchw != 0))
3091       result |= CPU_3DNOW_PREFETCH;
3092     if (ext_cpuid1_ecx.bits.lzcnt != 0)
3093       result |= CPU_LZCNT;
3094     if (ext_cpuid1_ecx.bits.sse4a != 0)
3095       result |= CPU_SSE4A;
3096   }
3097 
3098   // Intel features.
3099   if (is_intel()) {
3100     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3101       result |= CPU_LZCNT;
3102     }
3103     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3104       result |= CPU_3DNOW_PREFETCH;
3105     }
3106     if (sef_cpuid7_ebx.bits.clwb != 0) {
3107       result |= CPU_CLWB;
3108     }
3109     if (sef_cpuid7_edx.bits.serialize != 0)
3110       result |= CPU_SERIALIZE;
3111   }
3112 
3113   // ZX features.
3114   if (is_zx()) {
3115     if (ext_cpuid1_ecx.bits.lzcnt != 0) {
3116       result |= CPU_LZCNT;
3117     }
3118     if (ext_cpuid1_ecx.bits.prefetchw != 0) {
3119       result |= CPU_3DNOW_PREFETCH;
3120     }
3121   }
3122 
3123   // Protection key features.
3124   if (sef_cpuid7_ecx.bits.pku != 0) {
3125     result |= CPU_PKU;
3126   }
3127   if (sef_cpuid7_ecx.bits.ospke != 0) {
3128     result |= CPU_OSPKE;
3129   }
3130 
3131   // Control flow enforcement (CET) features.
3132   if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3133     result |= CPU_CET_SS;
3134   }
3135   if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3136     result |= CPU_CET_IBT;
3137   }
3138 
3139   // Composite features.
3140   if (supports_tscinv_bit() &&
3141       ((is_amd_family() && !is_amd_Barcelona()) ||
3142        is_intel_tsc_synched_at_init())) {
3143     result |= CPU_TSCINV;
3144   }
3145 
3146   return result;
3147 }
3148 
3149 bool VM_Version::os_supports_avx_vectors() {
3150   bool retVal = false;
3151   int nreg = 2 LP64_ONLY(+2);
3152   if (supports_evex()) {
3153     // Verify that OS save/restore all bits of EVEX registers
3154     // during signal processing.
3155     retVal = true;
3156     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3157       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3158         retVal = false;
3159         break;
3160       }
3161     }
3162   } else if (supports_avx()) {
3163     // Verify that OS save/restore all bits of AVX registers
3164     // during signal processing.
3165     retVal = true;
3166     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3167       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3168         retVal = false;
3169         break;
3170       }
3171     }
3172     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3173     if (retVal == false) {
3174       // Verify that OS save/restore all bits of EVEX registers
3175       // during signal processing.
3176       retVal = true;
3177       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3178         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3179           retVal = false;
3180           break;
3181         }
3182       }
3183     }
3184   }
3185   return retVal;
3186 }
3187 
3188 bool VM_Version::os_supports_apx_egprs() {
3189   if (!supports_apx_f()) {
3190     return false;
3191   }
3192   // Enable APX support for product builds after
3193   // completion of planned features listed in JDK-8329030.
3194 #if !defined(PRODUCT)
3195   if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3196       _cpuid_info.apx_save[1] != egpr_test_value()) {
3197     return false;
3198   }
3199   return true;
3200 #else
3201   return false;
3202 #endif
3203 }
3204 
3205 uint VM_Version::cores_per_cpu() {
3206   uint result = 1;
3207   if (is_intel()) {
3208     bool supports_topology = supports_processor_topology();
3209     if (supports_topology) {
3210       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3211                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3212     }
3213     if (!supports_topology || result == 0) {
3214       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3215     }
3216   } else if (is_amd_family()) {
3217     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3218   } else if (is_zx()) {
3219     bool supports_topology = supports_processor_topology();
3220     if (supports_topology) {
3221       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3222                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3223     }
3224     if (!supports_topology || result == 0) {
3225       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3226     }
3227   }
3228   return result;
3229 }
3230 
3231 uint VM_Version::threads_per_core() {
3232   uint result = 1;
3233   if (is_intel() && supports_processor_topology()) {
3234     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3235   } else if (is_zx() && supports_processor_topology()) {
3236     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3237   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3238     if (cpu_family() >= 0x17) {
3239       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3240     } else {
3241       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3242                  cores_per_cpu();
3243     }
3244   }
3245   return (result == 0 ? 1 : result);
3246 }
3247 
3248 uint VM_Version::L1_line_size() {
3249   uint result = 0;
3250   if (is_intel()) {
3251     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3252   } else if (is_amd_family()) {
3253     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3254   } else if (is_zx()) {
3255     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3256   }
3257   if (result < 32) // not defined ?
3258     result = 32;   // 32 bytes by default on x86 and other x64
3259   return result;
3260 }
3261 
3262 bool VM_Version::is_intel_tsc_synched_at_init() {
3263   if (is_intel_family_core()) {
3264     uint32_t ext_model = extended_cpu_model();
3265     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3266         ext_model == CPU_MODEL_WESTMERE_EP    ||
3267         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3268         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3269       // <= 2-socket invariant tsc support. EX versions are usually used
3270       // in > 2-socket systems and likely don't synchronize tscs at
3271       // initialization.
3272       // Code that uses tsc values must be prepared for them to arbitrarily
3273       // jump forward or backward.
3274       return true;
3275     }
3276   }
3277   return false;
3278 }
3279 
3280 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3281   // Hardware prefetching (distance/size in bytes):
3282   // Pentium 3 -  64 /  32
3283   // Pentium 4 - 256 / 128
3284   // Athlon    -  64 /  32 ????
3285   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3286   // Core      - 128 /  64
3287   //
3288   // Software prefetching (distance in bytes / instruction with best score):
3289   // Pentium 3 - 128 / prefetchnta
3290   // Pentium 4 - 512 / prefetchnta
3291   // Athlon    - 128 / prefetchnta
3292   // Opteron   - 256 / prefetchnta
3293   // Core      - 256 / prefetchnta
3294   // It will be used only when AllocatePrefetchStyle > 0
3295 
3296   if (is_amd_family()) { // AMD | Hygon
3297     if (supports_sse2()) {
3298       return 256; // Opteron
3299     } else {
3300       return 128; // Athlon
3301     }
3302   } else { // Intel
3303     if (supports_sse3() && cpu_family() == 6) {
3304       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3305         return 192;
3306       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3307 #ifdef _LP64
3308         return 384;
3309 #else
3310         return 320;
3311 #endif
3312       }
3313     }
3314     if (supports_sse2()) {
3315       if (cpu_family() == 6) {
3316         return 256; // Pentium M, Core, Core2
3317       } else {
3318         return 512; // Pentium 4
3319       }
3320     } else {
3321       return 128; // Pentium 3 (and all other old CPUs)
3322     }
3323   }
3324 }
3325 
3326 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3327   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3328   switch (id) {
3329   case vmIntrinsics::_floatToFloat16:
3330   case vmIntrinsics::_float16ToFloat:
3331     if (!supports_float16()) {
3332       return false;
3333     }
3334     break;
3335   default:
3336     break;
3337   }
3338   return true;
3339 }