1 /*
   2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jvm.h"
  27 #include "asm/macroAssembler.hpp"
  28 #include "asm/macroAssembler.inline.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/powerOfTwo.hpp"
  41 #include "utilities/virtualizationSupport.hpp"
  42 
  43 int VM_Version::_cpu;
  44 int VM_Version::_model;
  45 int VM_Version::_stepping;
  46 bool VM_Version::_has_intel_jcc_erratum;
  47 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  48 
  49 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  50 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  51 #undef DECLARE_CPU_FEATURE_FLAG
  52 
  53 // Address of instruction which causes SEGV
  54 address VM_Version::_cpuinfo_segv_addr = 0;
  55 // Address of instruction after the one which causes SEGV
  56 address VM_Version::_cpuinfo_cont_addr = 0;
  57 
  58 static BufferBlob* stub_blob;
  59 static const int stub_size = 2000;
  60 
  61 extern "C" {
  62   typedef void (*get_cpu_info_stub_t)(void*);
  63   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  64 }
  65 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
  66 static detect_virt_stub_t detect_virt_stub = NULL;
  67 
  68 #ifdef _LP64
  69 
  70 bool VM_Version::supports_clflush() {
  71   // clflush should always be available on x86_64
  72   // if not we are in real trouble because we rely on it
  73   // to flush the code cache.
  74   // Unfortunately, Assembler::clflush is currently called as part
  75   // of generation of the code cache flush routine. This happens
  76   // under Universe::init before the processor features are set
  77   // up. Assembler::flush calls this routine to check that clflush
  78   // is allowed. So, we give the caller a free pass if Universe init
  79   // is still in progress.
  80   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  81   return true;
  82 }
  83 #endif
  84 
  85 #define CPUID_STANDARD_FN   0x0
  86 #define CPUID_STANDARD_FN_1 0x1
  87 #define CPUID_STANDARD_FN_4 0x4
  88 #define CPUID_STANDARD_FN_B 0xb
  89 
  90 #define CPUID_EXTENDED_FN   0x80000000
  91 #define CPUID_EXTENDED_FN_1 0x80000001
  92 #define CPUID_EXTENDED_FN_2 0x80000002
  93 #define CPUID_EXTENDED_FN_3 0x80000003
  94 #define CPUID_EXTENDED_FN_4 0x80000004
  95 #define CPUID_EXTENDED_FN_7 0x80000007
  96 #define CPUID_EXTENDED_FN_8 0x80000008
  97 
  98 class VM_Version_StubGenerator: public StubCodeGenerator {
  99  public:
 100 
 101   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 102 
 103   address generate_get_cpu_info() {
 104     // Flags to test CPU type.
 105     const uint32_t HS_EFL_AC = 0x40000;
 106     const uint32_t HS_EFL_ID = 0x200000;
 107     // Values for when we don't have a CPUID instruction.
 108     const int      CPU_FAMILY_SHIFT = 8;
 109     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 110     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 111     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 112 
 113     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 114     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 115     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 116 
 117     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 118 #   define __ _masm->
 119 
 120     address start = __ pc();
 121 
 122     //
 123     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 124     //
 125     // LP64: rcx and rdx are first and second argument registers on windows
 126 
 127     __ push(rbp);
 128 #ifdef _LP64
 129     __ mov(rbp, c_rarg0); // cpuid_info address
 130 #else
 131     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 132 #endif
 133     __ push(rbx);
 134     __ push(rsi);
 135     __ pushf();          // preserve rbx, and flags
 136     __ pop(rax);
 137     __ push(rax);
 138     __ mov(rcx, rax);
 139     //
 140     // if we are unable to change the AC flag, we have a 386
 141     //
 142     __ xorl(rax, HS_EFL_AC);
 143     __ push(rax);
 144     __ popf();
 145     __ pushf();
 146     __ pop(rax);
 147     __ cmpptr(rax, rcx);
 148     __ jccb(Assembler::notEqual, detect_486);
 149 
 150     __ movl(rax, CPU_FAMILY_386);
 151     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 152     __ jmp(done);
 153 
 154     //
 155     // If we are unable to change the ID flag, we have a 486 which does
 156     // not support the "cpuid" instruction.
 157     //
 158     __ bind(detect_486);
 159     __ mov(rax, rcx);
 160     __ xorl(rax, HS_EFL_ID);
 161     __ push(rax);
 162     __ popf();
 163     __ pushf();
 164     __ pop(rax);
 165     __ cmpptr(rcx, rax);
 166     __ jccb(Assembler::notEqual, detect_586);
 167 
 168     __ bind(cpu486);
 169     __ movl(rax, CPU_FAMILY_486);
 170     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 171     __ jmp(done);
 172 
 173     //
 174     // At this point, we have a chip which supports the "cpuid" instruction
 175     //
 176     __ bind(detect_586);
 177     __ xorl(rax, rax);
 178     __ cpuid();
 179     __ orl(rax, rax);
 180     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 181                                         // value of at least 1, we give up and
 182                                         // assume a 486
 183     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 184     __ movl(Address(rsi, 0), rax);
 185     __ movl(Address(rsi, 4), rbx);
 186     __ movl(Address(rsi, 8), rcx);
 187     __ movl(Address(rsi,12), rdx);
 188 
 189     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 190     __ jccb(Assembler::belowEqual, std_cpuid4);
 191 
 192     //
 193     // cpuid(0xB) Processor Topology
 194     //
 195     __ movl(rax, 0xb);
 196     __ xorl(rcx, rcx);   // Threads level
 197     __ cpuid();
 198 
 199     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 200     __ movl(Address(rsi, 0), rax);
 201     __ movl(Address(rsi, 4), rbx);
 202     __ movl(Address(rsi, 8), rcx);
 203     __ movl(Address(rsi,12), rdx);
 204 
 205     __ movl(rax, 0xb);
 206     __ movl(rcx, 1);     // Cores level
 207     __ cpuid();
 208     __ push(rax);
 209     __ andl(rax, 0x1f);  // Determine if valid topology level
 210     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 211     __ andl(rax, 0xffff);
 212     __ pop(rax);
 213     __ jccb(Assembler::equal, std_cpuid4);
 214 
 215     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 216     __ movl(Address(rsi, 0), rax);
 217     __ movl(Address(rsi, 4), rbx);
 218     __ movl(Address(rsi, 8), rcx);
 219     __ movl(Address(rsi,12), rdx);
 220 
 221     __ movl(rax, 0xb);
 222     __ movl(rcx, 2);     // Packages level
 223     __ cpuid();
 224     __ push(rax);
 225     __ andl(rax, 0x1f);  // Determine if valid topology level
 226     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 227     __ andl(rax, 0xffff);
 228     __ pop(rax);
 229     __ jccb(Assembler::equal, std_cpuid4);
 230 
 231     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 232     __ movl(Address(rsi, 0), rax);
 233     __ movl(Address(rsi, 4), rbx);
 234     __ movl(Address(rsi, 8), rcx);
 235     __ movl(Address(rsi,12), rdx);
 236 
 237     //
 238     // cpuid(0x4) Deterministic cache params
 239     //
 240     __ bind(std_cpuid4);
 241     __ movl(rax, 4);
 242     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 243     __ jccb(Assembler::greater, std_cpuid1);
 244 
 245     __ xorl(rcx, rcx);   // L1 cache
 246     __ cpuid();
 247     __ push(rax);
 248     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 249     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 250     __ pop(rax);
 251     __ jccb(Assembler::equal, std_cpuid1);
 252 
 253     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 254     __ movl(Address(rsi, 0), rax);
 255     __ movl(Address(rsi, 4), rbx);
 256     __ movl(Address(rsi, 8), rcx);
 257     __ movl(Address(rsi,12), rdx);
 258 
 259     //
 260     // Standard cpuid(0x1)
 261     //
 262     __ bind(std_cpuid1);
 263     __ movl(rax, 1);
 264     __ cpuid();
 265     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 266     __ movl(Address(rsi, 0), rax);
 267     __ movl(Address(rsi, 4), rbx);
 268     __ movl(Address(rsi, 8), rcx);
 269     __ movl(Address(rsi,12), rdx);
 270 
 271     //
 272     // Check if OS has enabled XGETBV instruction to access XCR0
 273     // (OSXSAVE feature flag) and CPU supports AVX
 274     //
 275     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 276     __ cmpl(rcx, 0x18000000);
 277     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 278 
 279     //
 280     // XCR0, XFEATURE_ENABLED_MASK register
 281     //
 282     __ xorl(rcx, rcx);   // zero for XCR0 register
 283     __ xgetbv();
 284     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 285     __ movl(Address(rsi, 0), rax);
 286     __ movl(Address(rsi, 4), rdx);
 287 
 288     //
 289     // cpuid(0x7) Structured Extended Features
 290     //
 291     __ bind(sef_cpuid);
 292     __ movl(rax, 7);
 293     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 294     __ jccb(Assembler::greater, ext_cpuid);
 295 
 296     __ xorl(rcx, rcx);
 297     __ cpuid();
 298     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 299     __ movl(Address(rsi, 0), rax);
 300     __ movl(Address(rsi, 4), rbx);
 301     __ movl(Address(rsi, 8), rcx);
 302     __ movl(Address(rsi, 12), rdx);
 303 
 304     //
 305     // Extended cpuid(0x80000000)
 306     //
 307     __ bind(ext_cpuid);
 308     __ movl(rax, 0x80000000);
 309     __ cpuid();
 310     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 311     __ jcc(Assembler::belowEqual, done);
 312     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 313     __ jcc(Assembler::belowEqual, ext_cpuid1);
 314     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 315     __ jccb(Assembler::belowEqual, ext_cpuid5);
 316     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 317     __ jccb(Assembler::belowEqual, ext_cpuid7);
 318     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 319     __ jccb(Assembler::belowEqual, ext_cpuid8);
 320     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 321     __ jccb(Assembler::below, ext_cpuid8);
 322     //
 323     // Extended cpuid(0x8000001E)
 324     //
 325     __ movl(rax, 0x8000001E);
 326     __ cpuid();
 327     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 328     __ movl(Address(rsi, 0), rax);
 329     __ movl(Address(rsi, 4), rbx);
 330     __ movl(Address(rsi, 8), rcx);
 331     __ movl(Address(rsi,12), rdx);
 332 
 333     //
 334     // Extended cpuid(0x80000008)
 335     //
 336     __ bind(ext_cpuid8);
 337     __ movl(rax, 0x80000008);
 338     __ cpuid();
 339     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 340     __ movl(Address(rsi, 0), rax);
 341     __ movl(Address(rsi, 4), rbx);
 342     __ movl(Address(rsi, 8), rcx);
 343     __ movl(Address(rsi,12), rdx);
 344 
 345     //
 346     // Extended cpuid(0x80000007)
 347     //
 348     __ bind(ext_cpuid7);
 349     __ movl(rax, 0x80000007);
 350     __ cpuid();
 351     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 352     __ movl(Address(rsi, 0), rax);
 353     __ movl(Address(rsi, 4), rbx);
 354     __ movl(Address(rsi, 8), rcx);
 355     __ movl(Address(rsi,12), rdx);
 356 
 357     //
 358     // Extended cpuid(0x80000005)
 359     //
 360     __ bind(ext_cpuid5);
 361     __ movl(rax, 0x80000005);
 362     __ cpuid();
 363     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 364     __ movl(Address(rsi, 0), rax);
 365     __ movl(Address(rsi, 4), rbx);
 366     __ movl(Address(rsi, 8), rcx);
 367     __ movl(Address(rsi,12), rdx);
 368 
 369     //
 370     // Extended cpuid(0x80000001)
 371     //
 372     __ bind(ext_cpuid1);
 373     __ movl(rax, 0x80000001);
 374     __ cpuid();
 375     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 376     __ movl(Address(rsi, 0), rax);
 377     __ movl(Address(rsi, 4), rbx);
 378     __ movl(Address(rsi, 8), rcx);
 379     __ movl(Address(rsi,12), rdx);
 380 
 381     //
 382     // Check if OS has enabled XGETBV instruction to access XCR0
 383     // (OSXSAVE feature flag) and CPU supports AVX
 384     //
 385     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 386     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 387     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 388     __ cmpl(rcx, 0x18000000);
 389     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 390 
 391     __ movl(rax, 0x6);
 392     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 393     __ cmpl(rax, 0x6);
 394     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 395 
 396     // we need to bridge farther than imm8, so we use this island as a thunk
 397     __ bind(done);
 398     __ jmp(wrapup);
 399 
 400     __ bind(start_simd_check);
 401     //
 402     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 403     // registers are not restored after a signal processing.
 404     // Generate SEGV here (reference through NULL)
 405     // and check upper YMM/ZMM bits after it.
 406     //
 407     intx saved_useavx = UseAVX;
 408     intx saved_usesse = UseSSE;
 409 
 410     // If UseAVX is uninitialized or is set by the user to include EVEX
 411     if (use_evex) {
 412       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 413       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 414       __ movl(rax, 0x10000);
 415       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 416       __ cmpl(rax, 0x10000);
 417       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 418       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 419       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 420       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 421       __ movl(rax, 0xE0);
 422       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 423       __ cmpl(rax, 0xE0);
 424       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 425 
 426       if (FLAG_IS_DEFAULT(UseAVX)) {
 427         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 428         __ movl(rax, Address(rsi, 0));
 429         __ cmpl(rax, 0x50654);              // If it is Skylake
 430         __ jcc(Assembler::equal, legacy_setup);
 431       }
 432       // EVEX setup: run in lowest evex mode
 433       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 434       UseAVX = 3;
 435       UseSSE = 2;
 436 #ifdef _WINDOWS
 437       // xmm5-xmm15 are not preserved by caller on windows
 438       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 439       __ subptr(rsp, 64);
 440       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 441 #ifdef _LP64
 442       __ subptr(rsp, 64);
 443       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 444       __ subptr(rsp, 64);
 445       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 446 #endif // _LP64
 447 #endif // _WINDOWS
 448 
 449       // load value into all 64 bytes of zmm7 register
 450       __ movl(rcx, VM_Version::ymm_test_value());
 451       __ movdl(xmm0, rcx);
 452       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 453       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 454 #ifdef _LP64
 455       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 456       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 457 #endif
 458       VM_Version::clean_cpuFeatures();
 459       __ jmp(save_restore_except);
 460     }
 461 
 462     __ bind(legacy_setup);
 463     // AVX setup
 464     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 465     UseAVX = 1;
 466     UseSSE = 2;
 467 #ifdef _WINDOWS
 468     __ subptr(rsp, 32);
 469     __ vmovdqu(Address(rsp, 0), xmm7);
 470 #ifdef _LP64
 471     __ subptr(rsp, 32);
 472     __ vmovdqu(Address(rsp, 0), xmm8);
 473     __ subptr(rsp, 32);
 474     __ vmovdqu(Address(rsp, 0), xmm15);
 475 #endif // _LP64
 476 #endif // _WINDOWS
 477 
 478     // load value into all 32 bytes of ymm7 register
 479     __ movl(rcx, VM_Version::ymm_test_value());
 480 
 481     __ movdl(xmm0, rcx);
 482     __ pshufd(xmm0, xmm0, 0x00);
 483     __ vinsertf128_high(xmm0, xmm0);
 484     __ vmovdqu(xmm7, xmm0);
 485 #ifdef _LP64
 486     __ vmovdqu(xmm8, xmm0);
 487     __ vmovdqu(xmm15, xmm0);
 488 #endif
 489     VM_Version::clean_cpuFeatures();
 490 
 491     __ bind(save_restore_except);
 492     __ xorl(rsi, rsi);
 493     VM_Version::set_cpuinfo_segv_addr(__ pc());
 494     // Generate SEGV
 495     __ movl(rax, Address(rsi, 0));
 496 
 497     VM_Version::set_cpuinfo_cont_addr(__ pc());
 498     // Returns here after signal. Save xmm0 to check it later.
 499 
 500     // If UseAVX is uninitialized or is set by the user to include EVEX
 501     if (use_evex) {
 502       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 503       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 504       __ movl(rax, 0x10000);
 505       __ andl(rax, Address(rsi, 4));
 506       __ cmpl(rax, 0x10000);
 507       __ jcc(Assembler::notEqual, legacy_save_restore);
 508       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 509       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 510       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 511       __ movl(rax, 0xE0);
 512       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 513       __ cmpl(rax, 0xE0);
 514       __ jcc(Assembler::notEqual, legacy_save_restore);
 515 
 516       if (FLAG_IS_DEFAULT(UseAVX)) {
 517         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 518         __ movl(rax, Address(rsi, 0));
 519         __ cmpl(rax, 0x50654);              // If it is Skylake
 520         __ jcc(Assembler::equal, legacy_save_restore);
 521       }
 522       // EVEX check: run in lowest evex mode
 523       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 524       UseAVX = 3;
 525       UseSSE = 2;
 526       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 527       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 528       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 529 #ifdef _LP64
 530       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 531       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 532 #endif
 533 
 534 #ifdef _WINDOWS
 535 #ifdef _LP64
 536       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 537       __ addptr(rsp, 64);
 538       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 539       __ addptr(rsp, 64);
 540 #endif // _LP64
 541       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 542       __ addptr(rsp, 64);
 543 #endif // _WINDOWS
 544       generate_vzeroupper(wrapup);
 545       VM_Version::clean_cpuFeatures();
 546       UseAVX = saved_useavx;
 547       UseSSE = saved_usesse;
 548       __ jmp(wrapup);
 549    }
 550 
 551     __ bind(legacy_save_restore);
 552     // AVX check
 553     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 554     UseAVX = 1;
 555     UseSSE = 2;
 556     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 557     __ vmovdqu(Address(rsi, 0), xmm0);
 558     __ vmovdqu(Address(rsi, 32), xmm7);
 559 #ifdef _LP64
 560     __ vmovdqu(Address(rsi, 64), xmm8);
 561     __ vmovdqu(Address(rsi, 96), xmm15);
 562 #endif
 563 
 564 #ifdef _WINDOWS
 565 #ifdef _LP64
 566     __ vmovdqu(xmm15, Address(rsp, 0));
 567     __ addptr(rsp, 32);
 568     __ vmovdqu(xmm8, Address(rsp, 0));
 569     __ addptr(rsp, 32);
 570 #endif // _LP64
 571     __ vmovdqu(xmm7, Address(rsp, 0));
 572     __ addptr(rsp, 32);
 573 #endif // _WINDOWS
 574     generate_vzeroupper(wrapup);
 575     VM_Version::clean_cpuFeatures();
 576     UseAVX = saved_useavx;
 577     UseSSE = saved_usesse;
 578 
 579     __ bind(wrapup);
 580     __ popf();
 581     __ pop(rsi);
 582     __ pop(rbx);
 583     __ pop(rbp);
 584     __ ret(0);
 585 
 586 #   undef __
 587 
 588     return start;
 589   };
 590   void generate_vzeroupper(Label& L_wrapup) {
 591 #   define __ _masm->
 592     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 593     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 594     __ jcc(Assembler::notEqual, L_wrapup);
 595     __ movl(rcx, 0x0FFF0FF0);
 596     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 597     __ andl(rcx, Address(rsi, 0));
 598     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 599     __ jcc(Assembler::equal, L_wrapup);
 600     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 601     __ jcc(Assembler::equal, L_wrapup);
 602     // vzeroupper() will use a pre-computed instruction sequence that we
 603     // can't compute until after we've determined CPU capabilities. Use
 604     // uncached variant here directly to be able to bootstrap correctly
 605     __ vzeroupper_uncached();
 606 #   undef __
 607   }
 608   address generate_detect_virt() {
 609     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 610 #   define __ _masm->
 611 
 612     address start = __ pc();
 613 
 614     // Evacuate callee-saved registers
 615     __ push(rbp);
 616     __ push(rbx);
 617     __ push(rsi); // for Windows
 618 
 619 #ifdef _LP64
 620     __ mov(rax, c_rarg0); // CPUID leaf
 621     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 622 #else
 623     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 624     __ movptr(rsi, Address(rsp, 20)); // register array address
 625 #endif
 626 
 627     __ cpuid();
 628 
 629     // Store result to register array
 630     __ movl(Address(rsi,  0), rax);
 631     __ movl(Address(rsi,  4), rbx);
 632     __ movl(Address(rsi,  8), rcx);
 633     __ movl(Address(rsi, 12), rdx);
 634 
 635     // Epilogue
 636     __ pop(rsi);
 637     __ pop(rbx);
 638     __ pop(rbp);
 639     __ ret(0);
 640 
 641 #   undef __
 642 
 643     return start;
 644   };
 645 
 646 
 647   address generate_getCPUIDBrandString(void) {
 648     // Flags to test CPU type.
 649     const uint32_t HS_EFL_AC           = 0x40000;
 650     const uint32_t HS_EFL_ID           = 0x200000;
 651     // Values for when we don't have a CPUID instruction.
 652     const int      CPU_FAMILY_SHIFT = 8;
 653     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 654     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 655 
 656     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 657 
 658     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 659 #   define __ _masm->
 660 
 661     address start = __ pc();
 662 
 663     //
 664     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 665     //
 666     // LP64: rcx and rdx are first and second argument registers on windows
 667 
 668     __ push(rbp);
 669 #ifdef _LP64
 670     __ mov(rbp, c_rarg0); // cpuid_info address
 671 #else
 672     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 673 #endif
 674     __ push(rbx);
 675     __ push(rsi);
 676     __ pushf();          // preserve rbx, and flags
 677     __ pop(rax);
 678     __ push(rax);
 679     __ mov(rcx, rax);
 680     //
 681     // if we are unable to change the AC flag, we have a 386
 682     //
 683     __ xorl(rax, HS_EFL_AC);
 684     __ push(rax);
 685     __ popf();
 686     __ pushf();
 687     __ pop(rax);
 688     __ cmpptr(rax, rcx);
 689     __ jccb(Assembler::notEqual, detect_486);
 690 
 691     __ movl(rax, CPU_FAMILY_386);
 692     __ jmp(done);
 693 
 694     //
 695     // If we are unable to change the ID flag, we have a 486 which does
 696     // not support the "cpuid" instruction.
 697     //
 698     __ bind(detect_486);
 699     __ mov(rax, rcx);
 700     __ xorl(rax, HS_EFL_ID);
 701     __ push(rax);
 702     __ popf();
 703     __ pushf();
 704     __ pop(rax);
 705     __ cmpptr(rcx, rax);
 706     __ jccb(Assembler::notEqual, detect_586);
 707 
 708     __ bind(cpu486);
 709     __ movl(rax, CPU_FAMILY_486);
 710     __ jmp(done);
 711 
 712     //
 713     // At this point, we have a chip which supports the "cpuid" instruction
 714     //
 715     __ bind(detect_586);
 716     __ xorl(rax, rax);
 717     __ cpuid();
 718     __ orl(rax, rax);
 719     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 720                                         // value of at least 1, we give up and
 721                                         // assume a 486
 722 
 723     //
 724     // Extended cpuid(0x80000000) for processor brand string detection
 725     //
 726     __ bind(ext_cpuid);
 727     __ movl(rax, CPUID_EXTENDED_FN);
 728     __ cpuid();
 729     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 730     __ jcc(Assembler::below, done);
 731 
 732     //
 733     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 734     //
 735     __ movl(rax, CPUID_EXTENDED_FN_2);
 736     __ cpuid();
 737     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 738     __ movl(Address(rsi, 0), rax);
 739     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 740     __ movl(Address(rsi, 0), rbx);
 741     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 742     __ movl(Address(rsi, 0), rcx);
 743     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 744     __ movl(Address(rsi,0), rdx);
 745 
 746     //
 747     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 748     //
 749     __ movl(rax, CPUID_EXTENDED_FN_3);
 750     __ cpuid();
 751     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 752     __ movl(Address(rsi, 0), rax);
 753     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 754     __ movl(Address(rsi, 0), rbx);
 755     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 756     __ movl(Address(rsi, 0), rcx);
 757     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 758     __ movl(Address(rsi,0), rdx);
 759 
 760     //
 761     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 762     //
 763     __ movl(rax, CPUID_EXTENDED_FN_4);
 764     __ cpuid();
 765     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 766     __ movl(Address(rsi, 0), rax);
 767     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 768     __ movl(Address(rsi, 0), rbx);
 769     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 770     __ movl(Address(rsi, 0), rcx);
 771     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 772     __ movl(Address(rsi,0), rdx);
 773 
 774     //
 775     // return
 776     //
 777     __ bind(done);
 778     __ popf();
 779     __ pop(rsi);
 780     __ pop(rbx);
 781     __ pop(rbp);
 782     __ ret(0);
 783 
 784 #   undef __
 785 
 786     return start;
 787   };
 788 };
 789 
 790 void VM_Version::get_processor_features() {
 791 
 792   _cpu = 4; // 486 by default
 793   _model = 0;
 794   _stepping = 0;
 795   _features = 0;
 796   _logical_processors_per_package = 1;
 797   // i486 internal cache is both I&D and has a 16-byte line size
 798   _L1_data_cache_line_size = 16;
 799 
 800   // Get raw processor info
 801 
 802   get_cpu_info_stub(&_cpuid_info);
 803 
 804   assert_is_initialized();
 805   _cpu = extended_cpu_family();
 806   _model = extended_cpu_model();
 807   _stepping = cpu_stepping();
 808 
 809   if (cpu_family() > 4) { // it supports CPUID
 810     _features = feature_flags();
 811     // Logical processors are only available on P4s and above,
 812     // and only if hyperthreading is available.
 813     _logical_processors_per_package = logical_processor_count();
 814     _L1_data_cache_line_size = L1_line_size();
 815   }
 816 
 817   _supports_cx8 = supports_cmpxchg8();
 818   // xchg and xadd instructions
 819   _supports_atomic_getset4 = true;
 820   _supports_atomic_getadd4 = true;
 821   LP64_ONLY(_supports_atomic_getset8 = true);
 822   LP64_ONLY(_supports_atomic_getadd8 = true);
 823 
 824 #ifdef _LP64
 825   // OS should support SSE for x64 and hardware should support at least SSE2.
 826   if (!VM_Version::supports_sse2()) {
 827     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 828   }
 829   // in 64 bit the use of SSE2 is the minimum
 830   if (UseSSE < 2) UseSSE = 2;
 831 #endif
 832 
 833 #ifdef AMD64
 834   // flush_icache_stub have to be generated first.
 835   // That is why Icache line size is hard coded in ICache class,
 836   // see icache_x86.hpp. It is also the reason why we can't use
 837   // clflush instruction in 32-bit VM since it could be running
 838   // on CPU which does not support it.
 839   //
 840   // The only thing we can do is to verify that flushed
 841   // ICache::line_size has correct value.
 842   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 843   // clflush_size is size in quadwords (8 bytes).
 844   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 845 #endif
 846 
 847 #ifdef _LP64
 848   // assigning this field effectively enables Unsafe.writebackMemory()
 849   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 850   // that is only implemented on x86_64 and only if the OS plays ball
 851   if (os::supports_map_sync()) {
 852     // publish data cache line flush size to generic field, otherwise
 853     // let if default to zero thereby disabling writeback
 854     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 855   }
 856 #endif
 857   // If the OS doesn't support SSE, we can't use this feature even if the HW does
 858   if (!os::supports_sse())
 859     _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
 860 
 861   if (UseSSE < 4) {
 862     _features &= ~CPU_SSE4_1;
 863     _features &= ~CPU_SSE4_2;
 864   }
 865 
 866   if (UseSSE < 3) {
 867     _features &= ~CPU_SSE3;
 868     _features &= ~CPU_SSSE3;
 869     _features &= ~CPU_SSE4A;
 870   }
 871 
 872   if (UseSSE < 2)
 873     _features &= ~CPU_SSE2;
 874 
 875   if (UseSSE < 1)
 876     _features &= ~CPU_SSE;
 877 
 878   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 879   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 880     UseAVX = 0;
 881   }
 882 
 883   // first try initial setting and detect what we can support
 884   int use_avx_limit = 0;
 885   if (UseAVX > 0) {
 886     if (UseAVX > 2 && supports_evex()) {
 887       use_avx_limit = 3;
 888     } else if (UseAVX > 1 && supports_avx2()) {
 889       use_avx_limit = 2;
 890     } else if (UseAVX > 0 && supports_avx()) {
 891       use_avx_limit = 1;
 892     } else {
 893       use_avx_limit = 0;
 894     }
 895   }
 896   if (FLAG_IS_DEFAULT(UseAVX)) {
 897     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 898     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 899       FLAG_SET_DEFAULT(UseAVX, 2);
 900     } else {
 901       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 902     }
 903   }
 904   if (UseAVX > use_avx_limit) {
 905     warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit);
 906     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 907   } else if (UseAVX < 0) {
 908     warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX);
 909     FLAG_SET_DEFAULT(UseAVX, 0);
 910   }
 911 
 912   if (UseAVX < 3) {
 913     _features &= ~CPU_AVX512F;
 914     _features &= ~CPU_AVX512DQ;
 915     _features &= ~CPU_AVX512CD;
 916     _features &= ~CPU_AVX512BW;
 917     _features &= ~CPU_AVX512VL;
 918     _features &= ~CPU_AVX512_VPOPCNTDQ;
 919     _features &= ~CPU_AVX512_VPCLMULQDQ;
 920     _features &= ~CPU_AVX512_VAES;
 921     _features &= ~CPU_AVX512_VNNI;
 922     _features &= ~CPU_AVX512_VBMI;
 923     _features &= ~CPU_AVX512_VBMI2;
 924     _features &= ~CPU_AVX512_BITALG;
 925   }
 926 
 927   if (UseAVX < 2)
 928     _features &= ~CPU_AVX2;
 929 
 930   if (UseAVX < 1) {
 931     _features &= ~CPU_AVX;
 932     _features &= ~CPU_VZEROUPPER;
 933   }
 934 
 935   if (logical_processors_per_package() == 1) {
 936     // HT processor could be installed on a system which doesn't support HT.
 937     _features &= ~CPU_HT;
 938   }
 939 
 940   if (is_intel()) { // Intel cpus specific settings
 941     if (is_knights_family()) {
 942       _features &= ~CPU_VZEROUPPER;
 943       _features &= ~CPU_AVX512BW;
 944       _features &= ~CPU_AVX512VL;
 945       _features &= ~CPU_AVX512DQ;
 946       _features &= ~CPU_AVX512_VNNI;
 947       _features &= ~CPU_AVX512_VAES;
 948       _features &= ~CPU_AVX512_VPOPCNTDQ;
 949       _features &= ~CPU_AVX512_VPCLMULQDQ;
 950       _features &= ~CPU_AVX512_VBMI;
 951       _features &= ~CPU_AVX512_VBMI2;
 952       _features &= ~CPU_CLWB;
 953       _features &= ~CPU_FLUSHOPT;
 954       _features &= ~CPU_GFNI;
 955       _features &= ~CPU_AVX512_BITALG;
 956     }
 957   }
 958 
 959   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
 960     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
 961   } else {
 962     _has_intel_jcc_erratum = IntelJccErratumMitigation;
 963   }
 964 
 965   char buf[512];
 966   int res = jio_snprintf(
 967               buf, sizeof(buf),
 968               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
 969               cores_per_cpu(), threads_per_core(),
 970               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
 971   assert(res > 0, "not enough temporary space allocated");
 972   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
 973 
 974   _features_string = os::strdup(buf);
 975 
 976   // UseSSE is set to the smaller of what hardware supports and what
 977   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 978   // older Pentiums which do not support it.
 979   int use_sse_limit = 0;
 980   if (UseSSE > 0) {
 981     if (UseSSE > 3 && supports_sse4_1()) {
 982       use_sse_limit = 4;
 983     } else if (UseSSE > 2 && supports_sse3()) {
 984       use_sse_limit = 3;
 985     } else if (UseSSE > 1 && supports_sse2()) {
 986       use_sse_limit = 2;
 987     } else if (UseSSE > 0 && supports_sse()) {
 988       use_sse_limit = 1;
 989     } else {
 990       use_sse_limit = 0;
 991     }
 992   }
 993   if (FLAG_IS_DEFAULT(UseSSE)) {
 994     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 995   } else if (UseSSE > use_sse_limit) {
 996     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit);
 997     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 998   } else if (UseSSE < 0) {
 999     warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE);
1000     FLAG_SET_DEFAULT(UseSSE, 0);
1001   }
1002 
1003   // Use AES instructions if available.
1004   if (supports_aes()) {
1005     if (FLAG_IS_DEFAULT(UseAES)) {
1006       FLAG_SET_DEFAULT(UseAES, true);
1007     }
1008     if (!UseAES) {
1009       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1010         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1011       }
1012       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1013     } else {
1014       if (UseSSE > 2) {
1015         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1016           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1017         }
1018       } else {
1019         // The AES intrinsic stubs require AES instruction support (of course)
1020         // but also require sse3 mode or higher for instructions it use.
1021         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1022           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1023         }
1024         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1025       }
1026 
1027       // --AES-CTR begins--
1028       if (!UseAESIntrinsics) {
1029         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1030           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1031           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1032         }
1033       } else {
1034         if (supports_sse4_1()) {
1035           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1036             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1037           }
1038         } else {
1039            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1040            // but also require sse4.1 mode or higher for instructions it use.
1041           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1042              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1043            }
1044            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1045         }
1046       }
1047       // --AES-CTR ends--
1048     }
1049   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1050     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1051       warning("AES instructions are not available on this CPU");
1052       FLAG_SET_DEFAULT(UseAES, false);
1053     }
1054     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1055       warning("AES intrinsics are not available on this CPU");
1056       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1057     }
1058     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1059       warning("AES-CTR intrinsics are not available on this CPU");
1060       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1061     }
1062   }
1063 
1064   // Use CLMUL instructions if available.
1065   if (supports_clmul()) {
1066     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1067       UseCLMUL = true;
1068     }
1069   } else if (UseCLMUL) {
1070     if (!FLAG_IS_DEFAULT(UseCLMUL))
1071       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1072     FLAG_SET_DEFAULT(UseCLMUL, false);
1073   }
1074 
1075   if (UseCLMUL && (UseSSE > 2)) {
1076     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1077       UseCRC32Intrinsics = true;
1078     }
1079   } else if (UseCRC32Intrinsics) {
1080     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1081       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1082     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1083   }
1084 
1085 #ifdef _LP64
1086   if (supports_avx2()) {
1087     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1088       UseAdler32Intrinsics = true;
1089     }
1090   } else if (UseAdler32Intrinsics) {
1091     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1092       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1093     }
1094     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1095   }
1096 #else
1097   if (UseAdler32Intrinsics) {
1098     warning("Adler32Intrinsics not available on this CPU.");
1099     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1100   }
1101 #endif
1102 
1103   if (supports_sse4_2() && supports_clmul()) {
1104     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1105       UseCRC32CIntrinsics = true;
1106     }
1107   } else if (UseCRC32CIntrinsics) {
1108     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1109       warning("CRC32C intrinsics are not available on this CPU");
1110     }
1111     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1112   }
1113 
1114   // GHASH/GCM intrinsics
1115   if (UseCLMUL && (UseSSE > 2)) {
1116     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1117       UseGHASHIntrinsics = true;
1118     }
1119   } else if (UseGHASHIntrinsics) {
1120     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1121       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1122     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1123   }
1124 
1125   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1126   if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) {
1127     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1128       UseBASE64Intrinsics = true;
1129     }
1130   } else if (UseBASE64Intrinsics) {
1131      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1132       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1133     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1134   }
1135 
1136   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1137     if (FLAG_IS_DEFAULT(UseFMA)) {
1138       UseFMA = true;
1139     }
1140   } else if (UseFMA) {
1141     warning("FMA instructions are not available on this CPU");
1142     FLAG_SET_DEFAULT(UseFMA, false);
1143   }
1144 
1145   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1146     UseMD5Intrinsics = true;
1147   }
1148 
1149   if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
1150     if (FLAG_IS_DEFAULT(UseSHA)) {
1151       UseSHA = true;
1152     }
1153   } else if (UseSHA) {
1154     warning("SHA instructions are not available on this CPU");
1155     FLAG_SET_DEFAULT(UseSHA, false);
1156   }
1157 
1158   if (supports_sha() && supports_sse4_1() && UseSHA) {
1159     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1160       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1161     }
1162   } else if (UseSHA1Intrinsics) {
1163     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1164     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1165   }
1166 
1167   if (supports_sse4_1() && UseSHA) {
1168     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1169       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1170     }
1171   } else if (UseSHA256Intrinsics) {
1172     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1173     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1174   }
1175 
1176 #ifdef _LP64
1177   // These are only supported on 64-bit
1178   if (UseSHA && supports_avx2() && supports_bmi2()) {
1179     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1180       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1181     }
1182   } else
1183 #endif
1184   if (UseSHA512Intrinsics) {
1185     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1186     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1187   }
1188 
1189   if (UseSHA3Intrinsics) {
1190     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1191     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1192   }
1193 
1194   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1195     FLAG_SET_DEFAULT(UseSHA, false);
1196   }
1197 
1198   if (!supports_rtm() && UseRTMLocking) {
1199     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1200   }
1201 
1202 #if INCLUDE_RTM_OPT
1203   if (UseRTMLocking) {
1204     if (!CompilerConfig::is_c2_enabled()) {
1205       // Only C2 does RTM locking optimization.
1206       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1207     }
1208     if (is_intel_family_core()) {
1209       if ((_model == CPU_MODEL_HASWELL_E3) ||
1210           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1211           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1212         // currently a collision between SKL and HSW_E3
1213         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1214           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1215                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1216         } else {
1217           warning("UseRTMLocking is only available as experimental option on this platform.");
1218         }
1219       }
1220     }
1221     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1222       // RTM locking should be used only for applications with
1223       // high lock contention. For now we do not use it by default.
1224       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1225     }
1226   } else { // !UseRTMLocking
1227     if (UseRTMForStackLocks) {
1228       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1229         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1230       }
1231       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1232     }
1233     if (UseRTMDeopt) {
1234       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1235     }
1236     if (PrintPreciseRTMLockingStatistics) {
1237       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1238     }
1239   }
1240 #else
1241   if (UseRTMLocking) {
1242     // Only C2 does RTM locking optimization.
1243     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1244   }
1245 #endif
1246 
1247 #ifdef COMPILER2
1248   if (UseFPUForSpilling) {
1249     if (UseSSE < 2) {
1250       // Only supported with SSE2+
1251       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1252     }
1253   }
1254 #endif
1255 
1256 #if COMPILER2_OR_JVMCI
1257   int max_vector_size = 0;
1258   if (UseSSE < 2) {
1259     // Vectors (in XMM) are only supported with SSE2+
1260     // SSE is always 2 on x64.
1261     max_vector_size = 0;
1262   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1263     // 16 byte vectors (in XMM) are supported with SSE2+
1264     max_vector_size = 16;
1265   } else if (UseAVX == 1 || UseAVX == 2) {
1266     // 32 bytes vectors (in YMM) are only supported with AVX+
1267     max_vector_size = 32;
1268   } else if (UseAVX > 2) {
1269     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1270     max_vector_size = 64;
1271   }
1272 
1273 #ifdef _LP64
1274   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1275 #else
1276   int min_vector_size = 0;
1277 #endif
1278 
1279   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1280     if (MaxVectorSize < min_vector_size) {
1281       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1282       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1283     }
1284     if (MaxVectorSize > max_vector_size) {
1285       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1286       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1287     }
1288     if (!is_power_of_2(MaxVectorSize)) {
1289       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1290       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1291     }
1292   } else {
1293     // If default, use highest supported configuration
1294     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1295   }
1296 
1297 #if defined(COMPILER2)
1298   if (FLAG_IS_DEFAULT(SuperWordMaxVectorSize)) {
1299     if (FLAG_IS_DEFAULT(UseAVX) && UseAVX > 2 &&
1300         is_intel_skylake() && _stepping >= 5) {
1301       // Limit auto vectorization to 256 bit (32 byte) by default on Cascade Lake
1302       FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MIN2(MaxVectorSize, (intx)32));
1303     } else {
1304       FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
1305     }
1306   } else {
1307     if (SuperWordMaxVectorSize > MaxVectorSize) {
1308       warning("SuperWordMaxVectorSize cannot be greater than MaxVectorSize %i", (int) MaxVectorSize);
1309       FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
1310     }
1311     if (!is_power_of_2(SuperWordMaxVectorSize)) {
1312       warning("SuperWordMaxVectorSize must be a power of 2, setting to MaxVectorSize: %i", (int) MaxVectorSize);
1313       FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
1314     }
1315   }
1316 #endif
1317 
1318 #if defined(COMPILER2) && defined(ASSERT)
1319   if (MaxVectorSize > 0) {
1320     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1321       tty->print_cr("State of YMM registers after signal handle:");
1322       int nreg = 2 LP64_ONLY(+2);
1323       const char* ymm_name[4] = {"0", "7", "8", "15"};
1324       for (int i = 0; i < nreg; i++) {
1325         tty->print("YMM%s:", ymm_name[i]);
1326         for (int j = 7; j >=0; j--) {
1327           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1328         }
1329         tty->cr();
1330       }
1331     }
1332   }
1333 #endif // COMPILER2 && ASSERT
1334 
1335 #ifdef _LP64
1336   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1337     UseMultiplyToLenIntrinsic = true;
1338   }
1339   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1340     UseSquareToLenIntrinsic = true;
1341   }
1342   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1343     UseMulAddIntrinsic = true;
1344   }
1345   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1346     UseMontgomeryMultiplyIntrinsic = true;
1347   }
1348   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1349     UseMontgomerySquareIntrinsic = true;
1350   }
1351 #else
1352   if (UseMultiplyToLenIntrinsic) {
1353     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1354       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1355     }
1356     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1357   }
1358   if (UseMontgomeryMultiplyIntrinsic) {
1359     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1360       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1361     }
1362     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1363   }
1364   if (UseMontgomerySquareIntrinsic) {
1365     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1366       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1367     }
1368     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1369   }
1370   if (UseSquareToLenIntrinsic) {
1371     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1372       warning("squareToLen intrinsic is not available in 32-bit VM");
1373     }
1374     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1375   }
1376   if (UseMulAddIntrinsic) {
1377     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1378       warning("mulAdd intrinsic is not available in 32-bit VM");
1379     }
1380     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1381   }
1382 #endif // _LP64
1383 #endif // COMPILER2_OR_JVMCI
1384 
1385   // On new cpus instructions which update whole XMM register should be used
1386   // to prevent partial register stall due to dependencies on high half.
1387   //
1388   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1389   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1390   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1391   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1392 
1393 
1394   if (is_zx()) { // ZX cpus specific settings
1395     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1396       UseStoreImmI16 = false; // don't use it on ZX cpus
1397     }
1398     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1399       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1400         // Use it on all ZX cpus
1401         UseAddressNop = true;
1402       }
1403     }
1404     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1405       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1406     }
1407     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1408       if (supports_sse3()) {
1409         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1410       } else {
1411         UseXmmRegToRegMoveAll = false;
1412       }
1413     }
1414     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1415 #ifdef COMPILER2
1416       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1417         // For new ZX cpus do the next optimization:
1418         // don't align the beginning of a loop if there are enough instructions
1419         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1420         // in current fetch line (OptoLoopAlignment) or the padding
1421         // is big (> MaxLoopPad).
1422         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1423         // generated NOP instructions. 11 is the largest size of one
1424         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1425         MaxLoopPad = 11;
1426       }
1427 #endif // COMPILER2
1428       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1429         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1430       }
1431       if (supports_sse4_2()) { // new ZX cpus
1432         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1433           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1434         }
1435       }
1436       if (supports_sse4_2()) {
1437         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1438           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1439         }
1440       } else {
1441         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1442           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1443         }
1444         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1445       }
1446     }
1447 
1448     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1449       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1450     }
1451   }
1452 
1453   if (is_amd_family()) { // AMD cpus specific settings
1454     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1455       // Use it on new AMD cpus starting from Opteron.
1456       UseAddressNop = true;
1457     }
1458     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1459       // Use it on new AMD cpus starting from Opteron.
1460       UseNewLongLShift = true;
1461     }
1462     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1463       if (supports_sse4a()) {
1464         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1465       } else {
1466         UseXmmLoadAndClearUpper = false;
1467       }
1468     }
1469     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1470       if (supports_sse4a()) {
1471         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1472       } else {
1473         UseXmmRegToRegMoveAll = false;
1474       }
1475     }
1476     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1477       if (supports_sse4a()) {
1478         UseXmmI2F = true;
1479       } else {
1480         UseXmmI2F = false;
1481       }
1482     }
1483     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1484       if (supports_sse4a()) {
1485         UseXmmI2D = true;
1486       } else {
1487         UseXmmI2D = false;
1488       }
1489     }
1490     if (supports_sse4_2()) {
1491       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1492         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1493       }
1494     } else {
1495       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1496         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1497       }
1498       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1499     }
1500 
1501     // some defaults for AMD family 15h
1502     if (cpu_family() == 0x15) {
1503       // On family 15h processors default is no sw prefetch
1504       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1505         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1506       }
1507       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1508       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1509         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1510       }
1511       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1512       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1513         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1514       }
1515       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1516         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1517       }
1518     }
1519 
1520 #ifdef COMPILER2
1521     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1522       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1523       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1524     }
1525 #endif // COMPILER2
1526 
1527     // Some defaults for AMD family >= 17h && Hygon family 18h
1528     if (cpu_family() >= 0x17) {
1529       // On family >=17h processors use XMM and UnalignedLoadStores
1530       // for Array Copy
1531       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1532         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1533       }
1534       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1535         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1536       }
1537 #ifdef COMPILER2
1538       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1539         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1540       }
1541 #endif
1542     }
1543   }
1544 
1545   if (is_intel()) { // Intel cpus specific settings
1546     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1547       UseStoreImmI16 = false; // don't use it on Intel cpus
1548     }
1549     if (cpu_family() == 6 || cpu_family() == 15) {
1550       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1551         // Use it on all Intel cpus starting from PentiumPro
1552         UseAddressNop = true;
1553       }
1554     }
1555     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1556       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1557     }
1558     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1559       if (supports_sse3()) {
1560         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1561       } else {
1562         UseXmmRegToRegMoveAll = false;
1563       }
1564     }
1565     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1566 #ifdef COMPILER2
1567       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1568         // For new Intel cpus do the next optimization:
1569         // don't align the beginning of a loop if there are enough instructions
1570         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1571         // in current fetch line (OptoLoopAlignment) or the padding
1572         // is big (> MaxLoopPad).
1573         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1574         // generated NOP instructions. 11 is the largest size of one
1575         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1576         MaxLoopPad = 11;
1577       }
1578 #endif // COMPILER2
1579 
1580       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1581         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1582       }
1583       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1584         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1585           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1586         }
1587       }
1588       if (supports_sse4_2()) {
1589         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1590           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1591         }
1592       } else {
1593         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1594           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1595         }
1596         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1597       }
1598     }
1599     if (is_atom_family() || is_knights_family()) {
1600 #ifdef COMPILER2
1601       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1602         OptoScheduling = true;
1603       }
1604 #endif
1605       if (supports_sse4_2()) { // Silvermont
1606         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1607           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1608         }
1609       }
1610       if (FLAG_IS_DEFAULT(UseIncDec)) {
1611         FLAG_SET_DEFAULT(UseIncDec, false);
1612       }
1613     }
1614     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1615       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1616     }
1617 #ifdef COMPILER2
1618     if (UseAVX > 2) {
1619       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1620           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1621            ArrayOperationPartialInlineSize != 0 &&
1622            ArrayOperationPartialInlineSize != 16 &&
1623            ArrayOperationPartialInlineSize != 32 &&
1624            ArrayOperationPartialInlineSize != 64)) {
1625         int inline_size = 0;
1626         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1627           inline_size = 64;
1628         } else if (MaxVectorSize >= 32) {
1629           inline_size = 32;
1630         } else if (MaxVectorSize >= 16) {
1631           inline_size = 16;
1632         }
1633         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1634           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1635         }
1636         ArrayOperationPartialInlineSize = inline_size;
1637       }
1638 
1639       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1640         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1641         if (ArrayOperationPartialInlineSize) {
1642           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1643         } else {
1644           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1645         }
1646       }
1647     }
1648 #endif
1649   }
1650 
1651 #ifdef COMPILER2
1652   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1653     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1654       OptimizeFill = false;
1655     }
1656   }
1657 #endif
1658 
1659 #ifdef _LP64
1660   if (UseSSE42Intrinsics) {
1661     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1662       UseVectorizedMismatchIntrinsic = true;
1663     }
1664   } else if (UseVectorizedMismatchIntrinsic) {
1665     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1666       warning("vectorizedMismatch intrinsics are not available on this CPU");
1667     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1668   }
1669 #else
1670   if (UseVectorizedMismatchIntrinsic) {
1671     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1672       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1673     }
1674     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1675   }
1676 #endif // _LP64
1677 
1678   // Use count leading zeros count instruction if available.
1679   if (supports_lzcnt()) {
1680     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1681       UseCountLeadingZerosInstruction = true;
1682     }
1683    } else if (UseCountLeadingZerosInstruction) {
1684     warning("lzcnt instruction is not available on this CPU");
1685     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1686   }
1687 
1688   // Use count trailing zeros instruction if available
1689   if (supports_bmi1()) {
1690     // tzcnt does not require VEX prefix
1691     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1692       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1693         // Don't use tzcnt if BMI1 is switched off on command line.
1694         UseCountTrailingZerosInstruction = false;
1695       } else {
1696         UseCountTrailingZerosInstruction = true;
1697       }
1698     }
1699   } else if (UseCountTrailingZerosInstruction) {
1700     warning("tzcnt instruction is not available on this CPU");
1701     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1702   }
1703 
1704   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1705   // VEX prefix is generated only when AVX > 0.
1706   if (supports_bmi1() && supports_avx()) {
1707     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1708       UseBMI1Instructions = true;
1709     }
1710   } else if (UseBMI1Instructions) {
1711     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1712     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1713   }
1714 
1715   if (supports_bmi2() && supports_avx()) {
1716     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1717       UseBMI2Instructions = true;
1718     }
1719   } else if (UseBMI2Instructions) {
1720     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1721     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1722   }
1723 
1724   // Use population count instruction if available.
1725   if (supports_popcnt()) {
1726     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1727       UsePopCountInstruction = true;
1728     }
1729   } else if (UsePopCountInstruction) {
1730     warning("POPCNT instruction is not available on this CPU");
1731     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1732   }
1733 
1734   // Use fast-string operations if available.
1735   if (supports_erms()) {
1736     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1737       UseFastStosb = true;
1738     }
1739   } else if (UseFastStosb) {
1740     warning("fast-string operations are not available on this CPU");
1741     FLAG_SET_DEFAULT(UseFastStosb, false);
1742   }
1743 
1744   // For AMD Processors use XMM/YMM MOVDQU instructions
1745   // for Object Initialization as default
1746   if (is_amd() && cpu_family() >= 0x19) {
1747     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1748       UseFastStosb = false;
1749     }
1750   }
1751 
1752 #ifdef COMPILER2
1753   if (is_intel() && MaxVectorSize > 16) {
1754     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1755       UseFastStosb = false;
1756     }
1757   }
1758 #endif
1759 
1760   // Use XMM/YMM MOVDQU instruction for Object Initialization
1761   if (UseSSE >= 2 && UseUnalignedLoadStores) {
1762     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1763       UseXMMForObjInit = true;
1764     }
1765   } else if (UseXMMForObjInit) {
1766     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1767     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1768   }
1769 
1770 #ifdef COMPILER2
1771   if (FLAG_IS_DEFAULT(AlignVector)) {
1772     // Modern processors allow misaligned memory operations for vectors.
1773     AlignVector = !UseUnalignedLoadStores;
1774   }
1775 #endif // COMPILER2
1776 
1777   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1778     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1779       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1780     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1781       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1782     }
1783   }
1784 
1785   // Allocation prefetch settings
1786   intx cache_line_size = prefetch_data_size();
1787   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1788       (cache_line_size > AllocatePrefetchStepSize)) {
1789     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1790   }
1791 
1792   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1793     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1794     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1795       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1796     }
1797     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1798   }
1799 
1800   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1801     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1802     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1803   }
1804 
1805   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1806     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1807         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1808       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1809     }
1810 #ifdef COMPILER2
1811     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1812       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1813     }
1814 #endif
1815   }
1816 
1817   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1818 #ifdef COMPILER2
1819     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1820       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1821     }
1822 #endif
1823   }
1824 
1825 #ifdef _LP64
1826   // Prefetch settings
1827 
1828   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1829   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1830   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1831   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1832 
1833   // gc copy/scan is disabled if prefetchw isn't supported, because
1834   // Prefetch::write emits an inlined prefetchw on Linux.
1835   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1836   // The used prefetcht0 instruction works for both amd64 and em64t.
1837 
1838   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1839     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1840   }
1841   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1842     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1843   }
1844 #endif
1845 
1846   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1847      (cache_line_size > ContendedPaddingWidth))
1848      ContendedPaddingWidth = cache_line_size;
1849 
1850   // This machine allows unaligned memory accesses
1851   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1852     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1853   }
1854 
1855 #ifndef PRODUCT
1856   if (log_is_enabled(Info, os, cpu)) {
1857     LogStream ls(Log(os, cpu)::info());
1858     outputStream* log = &ls;
1859     log->print_cr("Logical CPUs per core: %u",
1860                   logical_processors_per_package());
1861     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1862     log->print("UseSSE=%d", (int) UseSSE);
1863     if (UseAVX > 0) {
1864       log->print("  UseAVX=%d", (int) UseAVX);
1865     }
1866     if (UseAES) {
1867       log->print("  UseAES=1");
1868     }
1869 #ifdef COMPILER2
1870     if (MaxVectorSize > 0) {
1871       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1872     }
1873 #endif
1874     log->cr();
1875     log->print("Allocation");
1876     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1877       log->print_cr(": no prefetching");
1878     } else {
1879       log->print(" prefetching: ");
1880       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1881         log->print("PREFETCHW");
1882       } else if (UseSSE >= 1) {
1883         if (AllocatePrefetchInstr == 0) {
1884           log->print("PREFETCHNTA");
1885         } else if (AllocatePrefetchInstr == 1) {
1886           log->print("PREFETCHT0");
1887         } else if (AllocatePrefetchInstr == 2) {
1888           log->print("PREFETCHT2");
1889         } else if (AllocatePrefetchInstr == 3) {
1890           log->print("PREFETCHW");
1891         }
1892       }
1893       if (AllocatePrefetchLines > 1) {
1894         log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1895       } else {
1896         log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1897       }
1898     }
1899 
1900     if (PrefetchCopyIntervalInBytes > 0) {
1901       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1902     }
1903     if (PrefetchScanIntervalInBytes > 0) {
1904       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1905     }
1906     if (ContendedPaddingWidth > 0) {
1907       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1908     }
1909   }
1910 #endif // !PRODUCT
1911   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1912       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1913   }
1914   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1915       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1916   }
1917 }
1918 
1919 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1920   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1921   if (vrt == XenHVM) {
1922     st->print_cr("Xen hardware-assisted virtualization detected");
1923   } else if (vrt == KVM) {
1924     st->print_cr("KVM virtualization detected");
1925   } else if (vrt == VMWare) {
1926     st->print_cr("VMWare virtualization detected");
1927     VirtualizationSupport::print_virtualization_info(st);
1928   } else if (vrt == HyperV) {
1929     st->print_cr("Hyper-V virtualization detected");
1930   } else if (vrt == HyperVRole) {
1931     st->print_cr("Hyper-V role detected");
1932   }
1933 }
1934 
1935 bool VM_Version::compute_has_intel_jcc_erratum() {
1936   if (!is_intel_family_core()) {
1937     // Only Intel CPUs are affected.
1938     return false;
1939   }
1940   // The following table of affected CPUs is based on the following document released by Intel:
1941   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1942   switch (_model) {
1943   case 0x8E:
1944     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1945     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1946     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1947     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1948     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1949     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1950     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1951     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1952     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1953     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1954   case 0x4E:
1955     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1956     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1957     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1958     return _stepping == 0x3;
1959   case 0x55:
1960     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1961     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1962     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1963     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1964     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1965     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1966     return _stepping == 0x4 || _stepping == 0x7;
1967   case 0x5E:
1968     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1969     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1970     return _stepping == 0x3;
1971   case 0x9E:
1972     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1973     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1974     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1975     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1976     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1977     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1978     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1979     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1980     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1981     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1982     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1983     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1984     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1985     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1986     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1987   case 0xA5:
1988     // Not in Intel documentation.
1989     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
1990     return true;
1991   case 0xA6:
1992     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
1993     return _stepping == 0x0;
1994   case 0xAE:
1995     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
1996     return _stepping == 0xA;
1997   default:
1998     // If we are running on another intel machine not recognized in the table, we are okay.
1999     return false;
2000   }
2001 }
2002 
2003 // On Xen, the cpuid instruction returns
2004 //  eax / registers[0]: Version of Xen
2005 //  ebx / registers[1]: chars 'XenV'
2006 //  ecx / registers[2]: chars 'MMXe'
2007 //  edx / registers[3]: chars 'nVMM'
2008 //
2009 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2010 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2011 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2012 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2013 //
2014 // more information :
2015 // https://kb.vmware.com/s/article/1009458
2016 //
2017 void VM_Version::check_virtualizations() {
2018   uint32_t registers[4] = {0};
2019   char signature[13] = {0};
2020 
2021   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2022   // from 0x40000000 until 0x40010000.
2023   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2024   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2025     detect_virt_stub(leaf, registers);
2026     memcpy(signature, &registers[1], 12);
2027 
2028     if (strncmp("VMwareVMware", signature, 12) == 0) {
2029       Abstract_VM_Version::_detected_virtualization = VMWare;
2030       // check for extended metrics from guestlib
2031       VirtualizationSupport::initialize();
2032     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2033       Abstract_VM_Version::_detected_virtualization = HyperV;
2034 #ifdef _WINDOWS
2035       // CPUID leaf 0x40000007 is available to the root partition only.
2036       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2037       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2038       detect_virt_stub(0x40000007, registers);
2039       if ((registers[0] != 0x0) ||
2040           (registers[1] != 0x0) ||
2041           (registers[2] != 0x0) ||
2042           (registers[3] != 0x0)) {
2043         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2044       }
2045 #endif
2046     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2047       Abstract_VM_Version::_detected_virtualization = KVM;
2048     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2049       Abstract_VM_Version::_detected_virtualization = XenHVM;
2050     }
2051   }
2052 }
2053 
2054 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2055 // for implementing the array copy and clear operations.
2056 // The Intel platforms that supports the serialize instruction
2057 // has improved implementation of 64-byte load/stores and so the default
2058 // threshold is set to 0 for these platforms.
2059 int VM_Version::avx3_threshold() {
2060   return (is_intel_family_core() &&
2061           supports_serialize() &&
2062           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2063 }
2064 
2065 static bool _vm_version_initialized = false;
2066 
2067 void VM_Version::initialize() {
2068   ResourceMark rm;
2069   // Making this stub must be FIRST use of assembler
2070   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2071   if (stub_blob == NULL) {
2072     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2073   }
2074   CodeBuffer c(stub_blob);
2075   VM_Version_StubGenerator g(&c);
2076 
2077   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2078                                      g.generate_get_cpu_info());
2079   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2080                                      g.generate_detect_virt());
2081 
2082   get_processor_features();
2083 
2084   LP64_ONLY(Assembler::precompute_instructions();)
2085 
2086   if (VM_Version::supports_hv()) { // Supports hypervisor
2087     check_virtualizations();
2088   }
2089   _vm_version_initialized = true;
2090 }
2091 
2092 typedef enum {
2093    CPU_FAMILY_8086_8088  = 0,
2094    CPU_FAMILY_INTEL_286  = 2,
2095    CPU_FAMILY_INTEL_386  = 3,
2096    CPU_FAMILY_INTEL_486  = 4,
2097    CPU_FAMILY_PENTIUM    = 5,
2098    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2099    CPU_FAMILY_PENTIUM_4  = 0xF
2100 } FamilyFlag;
2101 
2102 typedef enum {
2103   RDTSCP_FLAG  = 0x08000000, // bit 27
2104   INTEL64_FLAG = 0x20000000  // bit 29
2105 } _featureExtendedEdxFlag;
2106 
2107 typedef enum {
2108    FPU_FLAG     = 0x00000001,
2109    VME_FLAG     = 0x00000002,
2110    DE_FLAG      = 0x00000004,
2111    PSE_FLAG     = 0x00000008,
2112    TSC_FLAG     = 0x00000010,
2113    MSR_FLAG     = 0x00000020,
2114    PAE_FLAG     = 0x00000040,
2115    MCE_FLAG     = 0x00000080,
2116    CX8_FLAG     = 0x00000100,
2117    APIC_FLAG    = 0x00000200,
2118    SEP_FLAG     = 0x00000800,
2119    MTRR_FLAG    = 0x00001000,
2120    PGE_FLAG     = 0x00002000,
2121    MCA_FLAG     = 0x00004000,
2122    CMOV_FLAG    = 0x00008000,
2123    PAT_FLAG     = 0x00010000,
2124    PSE36_FLAG   = 0x00020000,
2125    PSNUM_FLAG   = 0x00040000,
2126    CLFLUSH_FLAG = 0x00080000,
2127    DTS_FLAG     = 0x00200000,
2128    ACPI_FLAG    = 0x00400000,
2129    MMX_FLAG     = 0x00800000,
2130    FXSR_FLAG    = 0x01000000,
2131    SSE_FLAG     = 0x02000000,
2132    SSE2_FLAG    = 0x04000000,
2133    SS_FLAG      = 0x08000000,
2134    HTT_FLAG     = 0x10000000,
2135    TM_FLAG      = 0x20000000
2136 } FeatureEdxFlag;
2137 
2138 static BufferBlob* cpuid_brand_string_stub_blob;
2139 static const int   cpuid_brand_string_stub_size = 550;
2140 
2141 extern "C" {
2142   typedef void (*getCPUIDBrandString_stub_t)(void*);
2143 }
2144 
2145 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = NULL;
2146 
2147 // VM_Version statics
2148 enum {
2149   ExtendedFamilyIdLength_INTEL = 16,
2150   ExtendedFamilyIdLength_AMD   = 24
2151 };
2152 
2153 const size_t VENDOR_LENGTH = 13;
2154 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2155 static char* _cpu_brand_string = NULL;
2156 static int64_t _max_qualified_cpu_frequency = 0;
2157 
2158 static int _no_of_threads = 0;
2159 static int _no_of_cores = 0;
2160 
2161 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2162   "8086/8088",
2163   "",
2164   "286",
2165   "386",
2166   "486",
2167   "Pentium",
2168   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2169   "",
2170   "",
2171   "",
2172   "",
2173   "",
2174   "",
2175   "",
2176   "",
2177   "Pentium 4"
2178 };
2179 
2180 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2181   "",
2182   "",
2183   "",
2184   "",
2185   "5x86",
2186   "K5/K6",
2187   "Athlon/AthlonXP",
2188   "",
2189   "",
2190   "",
2191   "",
2192   "",
2193   "",
2194   "",
2195   "",
2196   "Opteron/Athlon64",
2197   "Opteron QC/Phenom",  // Barcelona et.al.
2198   "",
2199   "",
2200   "",
2201   "",
2202   "",
2203   "",
2204   "Zen"
2205 };
2206 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2207 // September 2013, Vol 3C Table 35-1
2208 const char* const _model_id_pentium_pro[] = {
2209   "",
2210   "Pentium Pro",
2211   "",
2212   "Pentium II model 3",
2213   "",
2214   "Pentium II model 5/Xeon/Celeron",
2215   "Celeron",
2216   "Pentium III/Pentium III Xeon",
2217   "Pentium III/Pentium III Xeon",
2218   "Pentium M model 9",    // Yonah
2219   "Pentium III, model A",
2220   "Pentium III, model B",
2221   "",
2222   "Pentium M model D",    // Dothan
2223   "",
2224   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2225   "",
2226   "",
2227   "",
2228   "",
2229   "",
2230   "",
2231   "Celeron",              // 0x16 Celeron 65nm
2232   "Core 2",               // 0x17 Penryn / Harpertown
2233   "",
2234   "",
2235   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2236   "Atom",                 // 0x1B Z5xx series Silverthorn
2237   "",
2238   "Core 2",               // 0x1D Dunnington (6-core)
2239   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2240   "",
2241   "",
2242   "",
2243   "",
2244   "",
2245   "",
2246   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2247   "",
2248   "",
2249   "",                     // 0x28
2250   "",
2251   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2252   "",
2253   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2254   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2255   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2256   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2257   "",
2258   "",
2259   "",
2260   "",
2261   "",
2262   "",
2263   "",
2264   "",
2265   "",
2266   "",
2267   "Ivy Bridge",           // 0x3a
2268   "",
2269   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2270   "",                     // 0x3d "Next Generation Intel Core Processor"
2271   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2272   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2273   "",
2274   "",
2275   "",
2276   "",
2277   "",
2278   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2279   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2280   NULL
2281 };
2282 
2283 /* Brand ID is for back compatibility
2284  * Newer CPUs uses the extended brand string */
2285 const char* const _brand_id[] = {
2286   "",
2287   "Celeron processor",
2288   "Pentium III processor",
2289   "Intel Pentium III Xeon processor",
2290   "",
2291   "",
2292   "",
2293   "",
2294   "Intel Pentium 4 processor",
2295   NULL
2296 };
2297 
2298 
2299 const char* const _feature_edx_id[] = {
2300   "On-Chip FPU",
2301   "Virtual Mode Extensions",
2302   "Debugging Extensions",
2303   "Page Size Extensions",
2304   "Time Stamp Counter",
2305   "Model Specific Registers",
2306   "Physical Address Extension",
2307   "Machine Check Exceptions",
2308   "CMPXCHG8B Instruction",
2309   "On-Chip APIC",
2310   "",
2311   "Fast System Call",
2312   "Memory Type Range Registers",
2313   "Page Global Enable",
2314   "Machine Check Architecture",
2315   "Conditional Mov Instruction",
2316   "Page Attribute Table",
2317   "36-bit Page Size Extension",
2318   "Processor Serial Number",
2319   "CLFLUSH Instruction",
2320   "",
2321   "Debug Trace Store feature",
2322   "ACPI registers in MSR space",
2323   "Intel Architecture MMX Technology",
2324   "Fast Float Point Save and Restore",
2325   "Streaming SIMD extensions",
2326   "Streaming SIMD extensions 2",
2327   "Self-Snoop",
2328   "Hyper Threading",
2329   "Thermal Monitor",
2330   "",
2331   "Pending Break Enable"
2332 };
2333 
2334 const char* const _feature_extended_edx_id[] = {
2335   "",
2336   "",
2337   "",
2338   "",
2339   "",
2340   "",
2341   "",
2342   "",
2343   "",
2344   "",
2345   "",
2346   "SYSCALL/SYSRET",
2347   "",
2348   "",
2349   "",
2350   "",
2351   "",
2352   "",
2353   "",
2354   "",
2355   "Execute Disable Bit",
2356   "",
2357   "",
2358   "",
2359   "",
2360   "",
2361   "",
2362   "RDTSCP",
2363   "",
2364   "Intel 64 Architecture",
2365   "",
2366   ""
2367 };
2368 
2369 const char* const _feature_ecx_id[] = {
2370   "Streaming SIMD Extensions 3",
2371   "PCLMULQDQ",
2372   "64-bit DS Area",
2373   "MONITOR/MWAIT instructions",
2374   "CPL Qualified Debug Store",
2375   "Virtual Machine Extensions",
2376   "Safer Mode Extensions",
2377   "Enhanced Intel SpeedStep technology",
2378   "Thermal Monitor 2",
2379   "Supplemental Streaming SIMD Extensions 3",
2380   "L1 Context ID",
2381   "",
2382   "Fused Multiply-Add",
2383   "CMPXCHG16B",
2384   "xTPR Update Control",
2385   "Perfmon and Debug Capability",
2386   "",
2387   "Process-context identifiers",
2388   "Direct Cache Access",
2389   "Streaming SIMD extensions 4.1",
2390   "Streaming SIMD extensions 4.2",
2391   "x2APIC",
2392   "MOVBE",
2393   "Popcount instruction",
2394   "TSC-Deadline",
2395   "AESNI",
2396   "XSAVE",
2397   "OSXSAVE",
2398   "AVX",
2399   "F16C",
2400   "RDRAND",
2401   ""
2402 };
2403 
2404 const char* const _feature_extended_ecx_id[] = {
2405   "LAHF/SAHF instruction support",
2406   "Core multi-processor legacy mode",
2407   "",
2408   "",
2409   "",
2410   "Advanced Bit Manipulations: LZCNT",
2411   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2412   "Misaligned SSE mode",
2413   "",
2414   "",
2415   "",
2416   "",
2417   "",
2418   "",
2419   "",
2420   "",
2421   "",
2422   "",
2423   "",
2424   "",
2425   "",
2426   "",
2427   "",
2428   "",
2429   "",
2430   "",
2431   "",
2432   "",
2433   "",
2434   "",
2435   "",
2436   ""
2437 };
2438 
2439 void VM_Version::initialize_tsc(void) {
2440   ResourceMark rm;
2441 
2442   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2443   if (cpuid_brand_string_stub_blob == NULL) {
2444     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2445   }
2446   CodeBuffer c(cpuid_brand_string_stub_blob);
2447   VM_Version_StubGenerator g(&c);
2448   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2449                                    g.generate_getCPUIDBrandString());
2450 }
2451 
2452 const char* VM_Version::cpu_model_description(void) {
2453   uint32_t cpu_family = extended_cpu_family();
2454   uint32_t cpu_model = extended_cpu_model();
2455   const char* model = NULL;
2456 
2457   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2458     for (uint32_t i = 0; i <= cpu_model; i++) {
2459       model = _model_id_pentium_pro[i];
2460       if (model == NULL) {
2461         break;
2462       }
2463     }
2464   }
2465   return model;
2466 }
2467 
2468 const char* VM_Version::cpu_brand_string(void) {
2469   if (_cpu_brand_string == NULL) {
2470     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2471     if (NULL == _cpu_brand_string) {
2472       return NULL;
2473     }
2474     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2475     if (ret_val != OS_OK) {
2476       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2477       _cpu_brand_string = NULL;
2478     }
2479   }
2480   return _cpu_brand_string;
2481 }
2482 
2483 const char* VM_Version::cpu_brand(void) {
2484   const char*  brand  = NULL;
2485 
2486   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2487     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2488     brand = _brand_id[0];
2489     for (int i = 0; brand != NULL && i <= brand_num; i += 1) {
2490       brand = _brand_id[i];
2491     }
2492   }
2493   return brand;
2494 }
2495 
2496 bool VM_Version::cpu_is_em64t(void) {
2497   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2498 }
2499 
2500 bool VM_Version::is_netburst(void) {
2501   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2502 }
2503 
2504 bool VM_Version::supports_tscinv_ext(void) {
2505   if (!supports_tscinv_bit()) {
2506     return false;
2507   }
2508 
2509   if (is_intel()) {
2510     return true;
2511   }
2512 
2513   if (is_amd()) {
2514     return !is_amd_Barcelona();
2515   }
2516 
2517   if (is_hygon()) {
2518     return true;
2519   }
2520 
2521   return false;
2522 }
2523 
2524 void VM_Version::resolve_cpu_information_details(void) {
2525 
2526   // in future we want to base this information on proper cpu
2527   // and cache topology enumeration such as:
2528   // Intel 64 Architecture Processor Topology Enumeration
2529   // which supports system cpu and cache topology enumeration
2530   // either using 2xAPICIDs or initial APICIDs
2531 
2532   // currently only rough cpu information estimates
2533   // which will not necessarily reflect the exact configuration of the system
2534 
2535   // this is the number of logical hardware threads
2536   // visible to the operating system
2537   _no_of_threads = os::processor_count();
2538 
2539   // find out number of threads per cpu package
2540   int threads_per_package = threads_per_core() * cores_per_cpu();
2541 
2542   // use amount of threads visible to the process in order to guess number of sockets
2543   _no_of_sockets = _no_of_threads / threads_per_package;
2544 
2545   // process might only see a subset of the total number of threads
2546   // from a single processor package. Virtualization/resource management for example.
2547   // If so then just write a hard 1 as num of pkgs.
2548   if (0 == _no_of_sockets) {
2549     _no_of_sockets = 1;
2550   }
2551 
2552   // estimate the number of cores
2553   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2554 }
2555 
2556 
2557 const char* VM_Version::cpu_family_description(void) {
2558   int cpu_family_id = extended_cpu_family();
2559   if (is_amd()) {
2560     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2561       return _family_id_amd[cpu_family_id];
2562     }
2563   }
2564   if (is_intel()) {
2565     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2566       return cpu_model_description();
2567     }
2568     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2569       return _family_id_intel[cpu_family_id];
2570     }
2571   }
2572   if (is_hygon()) {
2573     return "Dhyana";
2574   }
2575   return "Unknown x86";
2576 }
2577 
2578 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2579   assert(buf != NULL, "buffer is NULL!");
2580   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2581 
2582   const char* cpu_type = NULL;
2583   const char* x64 = NULL;
2584 
2585   if (is_intel()) {
2586     cpu_type = "Intel";
2587     x64 = cpu_is_em64t() ? " Intel64" : "";
2588   } else if (is_amd()) {
2589     cpu_type = "AMD";
2590     x64 = cpu_is_em64t() ? " AMD64" : "";
2591   } else if (is_hygon()) {
2592     cpu_type = "Hygon";
2593     x64 = cpu_is_em64t() ? " AMD64" : "";
2594   } else {
2595     cpu_type = "Unknown x86";
2596     x64 = cpu_is_em64t() ? " x86_64" : "";
2597   }
2598 
2599   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2600     cpu_type,
2601     cpu_family_description(),
2602     supports_ht() ? " (HT)" : "",
2603     supports_sse3() ? " SSE3" : "",
2604     supports_ssse3() ? " SSSE3" : "",
2605     supports_sse4_1() ? " SSE4.1" : "",
2606     supports_sse4_2() ? " SSE4.2" : "",
2607     supports_sse4a() ? " SSE4A" : "",
2608     is_netburst() ? " Netburst" : "",
2609     is_intel_family_core() ? " Core" : "",
2610     x64);
2611 
2612   return OS_OK;
2613 }
2614 
2615 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2616   assert(buf != NULL, "buffer is NULL!");
2617   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2618   assert(getCPUIDBrandString_stub != NULL, "not initialized");
2619 
2620   // invoke newly generated asm code to fetch CPU Brand String
2621   getCPUIDBrandString_stub(&_cpuid_info);
2622 
2623   // fetch results into buffer
2624   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2625   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2626   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2627   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2628   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2629   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2630   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2631   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2632   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2633   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2634   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2635   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2636 
2637   return OS_OK;
2638 }
2639 
2640 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2641   guarantee(buf != NULL, "buffer is NULL!");
2642   guarantee(buf_len > 0, "buffer len not enough!");
2643 
2644   unsigned int flag = 0;
2645   unsigned int fi = 0;
2646   size_t       written = 0;
2647   const char*  prefix = "";
2648 
2649 #define WRITE_TO_BUF(string)                                                          \
2650   {                                                                                   \
2651     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2652     if (res < 0) {                                                                    \
2653       return buf_len - 1;                                                             \
2654     }                                                                                 \
2655     written += res;                                                                   \
2656     if (prefix[0] == '\0') {                                                          \
2657       prefix = ", ";                                                                  \
2658     }                                                                                 \
2659   }
2660 
2661   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2662     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2663       continue; /* no hyperthreading */
2664     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2665       continue; /* no fast system call */
2666     }
2667     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2668       WRITE_TO_BUF(_feature_edx_id[fi]);
2669     }
2670   }
2671 
2672   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2673     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2674       WRITE_TO_BUF(_feature_ecx_id[fi]);
2675     }
2676   }
2677 
2678   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2679     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2680       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2681     }
2682   }
2683 
2684   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2685     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2686       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2687     }
2688   }
2689 
2690   if (supports_tscinv_bit()) {
2691       WRITE_TO_BUF("Invariant TSC");
2692   }
2693 
2694   return written;
2695 }
2696 
2697 /**
2698  * Write a detailed description of the cpu to a given buffer, including
2699  * feature set.
2700  */
2701 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2702   assert(buf != NULL, "buffer is NULL!");
2703   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2704 
2705   static const char* unknown = "<unknown>";
2706   char               vendor_id[VENDOR_LENGTH];
2707   const char*        family = NULL;
2708   const char*        model = NULL;
2709   const char*        brand = NULL;
2710   int                outputLen = 0;
2711 
2712   family = cpu_family_description();
2713   if (family == NULL) {
2714     family = unknown;
2715   }
2716 
2717   model = cpu_model_description();
2718   if (model == NULL) {
2719     model = unknown;
2720   }
2721 
2722   brand = cpu_brand_string();
2723 
2724   if (brand == NULL) {
2725     brand = cpu_brand();
2726     if (brand == NULL) {
2727       brand = unknown;
2728     }
2729   }
2730 
2731   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2732   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2733   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2734   vendor_id[VENDOR_LENGTH-1] = '\0';
2735 
2736   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2737     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2738     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2739     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2740     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2741     "Supports: ",
2742     brand,
2743     vendor_id,
2744     family,
2745     extended_cpu_family(),
2746     model,
2747     extended_cpu_model(),
2748     cpu_stepping(),
2749     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2750     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2751     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2752     _cpuid_info.std_cpuid1_eax.value,
2753     _cpuid_info.std_cpuid1_ebx.value,
2754     _cpuid_info.std_cpuid1_ecx.value,
2755     _cpuid_info.std_cpuid1_edx.value,
2756     _cpuid_info.ext_cpuid1_eax,
2757     _cpuid_info.ext_cpuid1_ebx,
2758     _cpuid_info.ext_cpuid1_ecx,
2759     _cpuid_info.ext_cpuid1_edx);
2760 
2761   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2762     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2763     return OS_ERR;
2764   }
2765 
2766   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2767 
2768   return OS_OK;
2769 }
2770 
2771 
2772 // Fill in Abstract_VM_Version statics
2773 void VM_Version::initialize_cpu_information() {
2774   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2775   assert(!_initialized, "shouldn't be initialized yet");
2776   resolve_cpu_information_details();
2777 
2778   // initialize cpu_name and cpu_desc
2779   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2780   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2781   _initialized = true;
2782 }
2783 
2784 /**
2785  *  For information about extracting the frequency from the cpu brand string, please see:
2786  *
2787  *    Intel Processor Identification and the CPUID Instruction
2788  *    Application Note 485
2789  *    May 2012
2790  *
2791  * The return value is the frequency in Hz.
2792  */
2793 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2794   const char* const brand_string = cpu_brand_string();
2795   if (brand_string == NULL) {
2796     return 0;
2797   }
2798   const int64_t MEGA = 1000000;
2799   int64_t multiplier = 0;
2800   int64_t frequency = 0;
2801   uint8_t idx = 0;
2802   // The brand string buffer is at most 48 bytes.
2803   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2804   for (; idx < 48-2; ++idx) {
2805     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2806     // Search brand string for "yHz" where y is M, G, or T.
2807     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2808       if (brand_string[idx] == 'M') {
2809         multiplier = MEGA;
2810       } else if (brand_string[idx] == 'G') {
2811         multiplier = MEGA * 1000;
2812       } else if (brand_string[idx] == 'T') {
2813         multiplier = MEGA * MEGA;
2814       }
2815       break;
2816     }
2817   }
2818   if (multiplier > 0) {
2819     // Compute frequency (in Hz) from brand string.
2820     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2821       frequency =  (brand_string[idx-4] - '0') * multiplier;
2822       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2823       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2824     } else { // format is "xxxx"
2825       frequency =  (brand_string[idx-4] - '0') * 1000;
2826       frequency += (brand_string[idx-3] - '0') * 100;
2827       frequency += (brand_string[idx-2] - '0') * 10;
2828       frequency += (brand_string[idx-1] - '0');
2829       frequency *= multiplier;
2830     }
2831   }
2832   return frequency;
2833 }
2834 
2835 
2836 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2837   if (_max_qualified_cpu_frequency == 0) {
2838     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2839   }
2840   return _max_qualified_cpu_frequency;
2841 }
2842 
2843 uint64_t VM_Version::feature_flags() {
2844   uint64_t result = 0;
2845   if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
2846     result |= CPU_CX8;
2847   if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
2848     result |= CPU_CMOV;
2849   if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
2850     result |= CPU_FLUSH;
2851 #ifdef _LP64
2852   // clflush should always be available on x86_64
2853   // if not we are in real trouble because we rely on it
2854   // to flush the code cache.
2855   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2856 #endif
2857   if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2858       _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
2859     result |= CPU_FXSR;
2860   // HT flag is set for multi-core processors also.
2861   if (threads_per_core() > 1)
2862     result |= CPU_HT;
2863   if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2864       _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
2865     result |= CPU_MMX;
2866   if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
2867     result |= CPU_SSE;
2868   if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
2869     result |= CPU_SSE2;
2870   if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
2871     result |= CPU_SSE3;
2872   if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
2873     result |= CPU_SSSE3;
2874   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
2875     result |= CPU_SSE4_1;
2876   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
2877     result |= CPU_SSE4_2;
2878   if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
2879     result |= CPU_POPCNT;
2880   if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
2881       _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
2882       _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
2883       _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
2884     result |= CPU_AVX;
2885     result |= CPU_VZEROUPPER;
2886     if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0)
2887       result |= CPU_F16C;
2888     if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
2889       result |= CPU_AVX2;
2890     if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
2891         _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
2892         _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
2893         _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
2894       result |= CPU_AVX512F;
2895       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
2896         result |= CPU_AVX512CD;
2897       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
2898         result |= CPU_AVX512DQ;
2899       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
2900         result |= CPU_AVX512PF;
2901       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
2902         result |= CPU_AVX512ER;
2903       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
2904         result |= CPU_AVX512BW;
2905       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
2906         result |= CPU_AVX512VL;
2907       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2908         result |= CPU_AVX512_VPOPCNTDQ;
2909       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2910         result |= CPU_AVX512_VPCLMULQDQ;
2911       if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
2912         result |= CPU_AVX512_VAES;
2913       if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
2914         result |= CPU_GFNI;
2915       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
2916         result |= CPU_AVX512_VNNI;
2917       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2918         result |= CPU_AVX512_BITALG;
2919       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2920         result |= CPU_AVX512_VBMI;
2921       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2922         result |= CPU_AVX512_VBMI2;
2923     }
2924   }
2925   if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
2926     result |= CPU_HV;
2927   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
2928     result |= CPU_BMI1;
2929   if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
2930     result |= CPU_TSC;
2931   if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
2932     result |= CPU_TSCINV_BIT;
2933   if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
2934     result |= CPU_AES;
2935   if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
2936     result |= CPU_ERMS;
2937   if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2938     result |= CPU_FSRM;
2939   if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
2940     result |= CPU_CLMUL;
2941   if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
2942     result |= CPU_RTM;
2943   if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
2944      result |= CPU_ADX;
2945   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
2946     result |= CPU_BMI2;
2947   if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
2948     result |= CPU_SHA;
2949   if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
2950     result |= CPU_FMA;
2951   if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
2952     result |= CPU_FLUSHOPT;
2953   if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
2954     result |= CPU_RDTSCP;
2955   if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
2956     result |= CPU_RDPID;
2957 
2958   // AMD|Hygon features.
2959   if (is_amd_family()) {
2960     if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
2961         (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
2962       result |= CPU_3DNOW_PREFETCH;
2963     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
2964       result |= CPU_LZCNT;
2965     if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
2966       result |= CPU_SSE4A;
2967   }
2968 
2969   // Intel features.
2970   if (is_intel()) {
2971     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
2972       result |= CPU_LZCNT;
2973     }
2974     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
2975       result |= CPU_3DNOW_PREFETCH;
2976     }
2977     if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
2978       result |= CPU_CLWB;
2979     }
2980     if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
2981       result |= CPU_SERIALIZE;
2982   }
2983 
2984   // ZX features.
2985   if (is_zx()) {
2986     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
2987       result |= CPU_LZCNT;
2988     }
2989     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
2990       result |= CPU_3DNOW_PREFETCH;
2991     }
2992   }
2993 
2994   // Protection key features.
2995   if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) {
2996     result |= CPU_PKU;
2997   }
2998   if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) {
2999     result |= CPU_OSPKE;
3000   }
3001 
3002   // Control flow enforcement (CET) features.
3003   if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) {
3004     result |= CPU_CET_SS;
3005   }
3006   if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) {
3007     result |= CPU_CET_IBT;
3008   }
3009 
3010   // Composite features.
3011   if (supports_tscinv_bit() &&
3012       ((is_amd_family() && !is_amd_Barcelona()) ||
3013        is_intel_tsc_synched_at_init())) {
3014     result |= CPU_TSCINV;
3015   }
3016 
3017   return result;
3018 }
3019 
3020 bool VM_Version::os_supports_avx_vectors() {
3021   bool retVal = false;
3022   int nreg = 2 LP64_ONLY(+2);
3023   if (supports_evex()) {
3024     // Verify that OS save/restore all bits of EVEX registers
3025     // during signal processing.
3026     retVal = true;
3027     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3028       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3029         retVal = false;
3030         break;
3031       }
3032     }
3033   } else if (supports_avx()) {
3034     // Verify that OS save/restore all bits of AVX registers
3035     // during signal processing.
3036     retVal = true;
3037     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3038       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3039         retVal = false;
3040         break;
3041       }
3042     }
3043     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3044     if (retVal == false) {
3045       // Verify that OS save/restore all bits of EVEX registers
3046       // during signal processing.
3047       retVal = true;
3048       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3049         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3050           retVal = false;
3051           break;
3052         }
3053       }
3054     }
3055   }
3056   return retVal;
3057 }
3058 
3059 uint VM_Version::cores_per_cpu() {
3060   uint result = 1;
3061   if (is_intel()) {
3062     bool supports_topology = supports_processor_topology();
3063     if (supports_topology) {
3064       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3065                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3066     }
3067     if (!supports_topology || result == 0) {
3068       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3069     }
3070   } else if (is_amd_family()) {
3071     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3072   } else if (is_zx()) {
3073     bool supports_topology = supports_processor_topology();
3074     if (supports_topology) {
3075       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3076                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3077     }
3078     if (!supports_topology || result == 0) {
3079       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3080     }
3081   }
3082   return result;
3083 }
3084 
3085 uint VM_Version::threads_per_core() {
3086   uint result = 1;
3087   if (is_intel() && supports_processor_topology()) {
3088     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3089   } else if (is_zx() && supports_processor_topology()) {
3090     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3091   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3092     if (cpu_family() >= 0x17) {
3093       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3094     } else {
3095       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3096                  cores_per_cpu();
3097     }
3098   }
3099   return (result == 0 ? 1 : result);
3100 }
3101 
3102 intx VM_Version::L1_line_size() {
3103   intx result = 0;
3104   if (is_intel()) {
3105     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3106   } else if (is_amd_family()) {
3107     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3108   } else if (is_zx()) {
3109     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3110   }
3111   if (result < 32) // not defined ?
3112     result = 32;   // 32 bytes by default on x86 and other x64
3113   return result;
3114 }
3115 
3116 bool VM_Version::is_intel_tsc_synched_at_init() {
3117   if (is_intel_family_core()) {
3118     uint32_t ext_model = extended_cpu_model();
3119     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3120         ext_model == CPU_MODEL_WESTMERE_EP    ||
3121         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3122         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3123       // <= 2-socket invariant tsc support. EX versions are usually used
3124       // in > 2-socket systems and likely don't synchronize tscs at
3125       // initialization.
3126       // Code that uses tsc values must be prepared for them to arbitrarily
3127       // jump forward or backward.
3128       return true;
3129     }
3130   }
3131   return false;
3132 }
3133 
3134 intx VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3135   // Hardware prefetching (distance/size in bytes):
3136   // Pentium 3 -  64 /  32
3137   // Pentium 4 - 256 / 128
3138   // Athlon    -  64 /  32 ????
3139   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3140   // Core      - 128 /  64
3141   //
3142   // Software prefetching (distance in bytes / instruction with best score):
3143   // Pentium 3 - 128 / prefetchnta
3144   // Pentium 4 - 512 / prefetchnta
3145   // Athlon    - 128 / prefetchnta
3146   // Opteron   - 256 / prefetchnta
3147   // Core      - 256 / prefetchnta
3148   // It will be used only when AllocatePrefetchStyle > 0
3149 
3150   if (is_amd_family()) { // AMD | Hygon
3151     if (supports_sse2()) {
3152       return 256; // Opteron
3153     } else {
3154       return 128; // Athlon
3155     }
3156   } else { // Intel
3157     if (supports_sse3() && cpu_family() == 6) {
3158       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3159         return 192;
3160       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3161 #ifdef _LP64
3162         return 384;
3163 #else
3164         return 320;
3165 #endif
3166       }
3167     }
3168     if (supports_sse2()) {
3169       if (cpu_family() == 6) {
3170         return 256; // Pentium M, Core, Core2
3171       } else {
3172         return 512; // Pentium 4
3173       }
3174     } else {
3175       return 128; // Pentium 3 (and all other old CPUs)
3176     }
3177   }
3178 }