1 /*
   2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "code/codeBlob.hpp"
  29 #include "compiler/compilerDefinitions.inline.hpp"
  30 #include "jvm.h"
  31 #include "logging/log.hpp"
  32 #include "logging/logStream.hpp"
  33 #include "memory/resourceArea.hpp"
  34 #include "memory/universe.hpp"
  35 #include "runtime/globals_extension.hpp"
  36 #include "runtime/java.hpp"
  37 #include "runtime/os.inline.hpp"
  38 #include "runtime/stubCodeGenerator.hpp"
  39 #include "runtime/vm_version.hpp"
  40 #include "utilities/powerOfTwo.hpp"
  41 #include "utilities/virtualizationSupport.hpp"
  42 
  43 int VM_Version::_cpu;
  44 int VM_Version::_model;
  45 int VM_Version::_stepping;
  46 bool VM_Version::_has_intel_jcc_erratum;
  47 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  48 
  49 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  50 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  51 #undef DECLARE_CPU_FEATURE_FLAG
  52 
  53 // Address of instruction which causes SEGV
  54 address VM_Version::_cpuinfo_segv_addr = 0;
  55 // Address of instruction after the one which causes SEGV
  56 address VM_Version::_cpuinfo_cont_addr = 0;
  57 
  58 static BufferBlob* stub_blob;
  59 static const int stub_size = 2000;
  60 
  61 extern "C" {
  62   typedef void (*get_cpu_info_stub_t)(void*);
  63   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  64 }
  65 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
  66 static detect_virt_stub_t detect_virt_stub = NULL;
  67 
  68 #ifdef _LP64
  69 
  70 bool VM_Version::supports_clflush() {
  71   // clflush should always be available on x86_64
  72   // if not we are in real trouble because we rely on it
  73   // to flush the code cache.
  74   // Unfortunately, Assembler::clflush is currently called as part
  75   // of generation of the code cache flush routine. This happens
  76   // under Universe::init before the processor features are set
  77   // up. Assembler::flush calls this routine to check that clflush
  78   // is allowed. So, we give the caller a free pass if Universe init
  79   // is still in progress.
  80   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  81   return true;
  82 }
  83 #endif
  84 
  85 #define CPUID_STANDARD_FN   0x0
  86 #define CPUID_STANDARD_FN_1 0x1
  87 #define CPUID_STANDARD_FN_4 0x4
  88 #define CPUID_STANDARD_FN_B 0xb
  89 
  90 #define CPUID_EXTENDED_FN   0x80000000
  91 #define CPUID_EXTENDED_FN_1 0x80000001
  92 #define CPUID_EXTENDED_FN_2 0x80000002
  93 #define CPUID_EXTENDED_FN_3 0x80000003
  94 #define CPUID_EXTENDED_FN_4 0x80000004
  95 #define CPUID_EXTENDED_FN_7 0x80000007
  96 #define CPUID_EXTENDED_FN_8 0x80000008
  97 
  98 class VM_Version_StubGenerator: public StubCodeGenerator {
  99  public:
 100 
 101   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 102 
 103   address generate_get_cpu_info() {
 104     // Flags to test CPU type.
 105     const uint32_t HS_EFL_AC = 0x40000;
 106     const uint32_t HS_EFL_ID = 0x200000;
 107     // Values for when we don't have a CPUID instruction.
 108     const int      CPU_FAMILY_SHIFT = 8;
 109     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 110     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 111     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 112 
 113     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 114     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 115     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 116 
 117     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 118 #   define __ _masm->
 119 
 120     address start = __ pc();
 121 
 122     //
 123     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 124     //
 125     // LP64: rcx and rdx are first and second argument registers on windows
 126 
 127     __ push(rbp);
 128 #ifdef _LP64
 129     __ mov(rbp, c_rarg0); // cpuid_info address
 130 #else
 131     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 132 #endif
 133     __ push(rbx);
 134     __ push(rsi);
 135     __ pushf();          // preserve rbx, and flags
 136     __ pop(rax);
 137     __ push(rax);
 138     __ mov(rcx, rax);
 139     //
 140     // if we are unable to change the AC flag, we have a 386
 141     //
 142     __ xorl(rax, HS_EFL_AC);
 143     __ push(rax);
 144     __ popf();
 145     __ pushf();
 146     __ pop(rax);
 147     __ cmpptr(rax, rcx);
 148     __ jccb(Assembler::notEqual, detect_486);
 149 
 150     __ movl(rax, CPU_FAMILY_386);
 151     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 152     __ jmp(done);
 153 
 154     //
 155     // If we are unable to change the ID flag, we have a 486 which does
 156     // not support the "cpuid" instruction.
 157     //
 158     __ bind(detect_486);
 159     __ mov(rax, rcx);
 160     __ xorl(rax, HS_EFL_ID);
 161     __ push(rax);
 162     __ popf();
 163     __ pushf();
 164     __ pop(rax);
 165     __ cmpptr(rcx, rax);
 166     __ jccb(Assembler::notEqual, detect_586);
 167 
 168     __ bind(cpu486);
 169     __ movl(rax, CPU_FAMILY_486);
 170     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 171     __ jmp(done);
 172 
 173     //
 174     // At this point, we have a chip which supports the "cpuid" instruction
 175     //
 176     __ bind(detect_586);
 177     __ xorl(rax, rax);
 178     __ cpuid();
 179     __ orl(rax, rax);
 180     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 181                                         // value of at least 1, we give up and
 182                                         // assume a 486
 183     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 184     __ movl(Address(rsi, 0), rax);
 185     __ movl(Address(rsi, 4), rbx);
 186     __ movl(Address(rsi, 8), rcx);
 187     __ movl(Address(rsi,12), rdx);
 188 
 189     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 190     __ jccb(Assembler::belowEqual, std_cpuid4);
 191 
 192     //
 193     // cpuid(0xB) Processor Topology
 194     //
 195     __ movl(rax, 0xb);
 196     __ xorl(rcx, rcx);   // Threads level
 197     __ cpuid();
 198 
 199     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 200     __ movl(Address(rsi, 0), rax);
 201     __ movl(Address(rsi, 4), rbx);
 202     __ movl(Address(rsi, 8), rcx);
 203     __ movl(Address(rsi,12), rdx);
 204 
 205     __ movl(rax, 0xb);
 206     __ movl(rcx, 1);     // Cores level
 207     __ cpuid();
 208     __ push(rax);
 209     __ andl(rax, 0x1f);  // Determine if valid topology level
 210     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 211     __ andl(rax, 0xffff);
 212     __ pop(rax);
 213     __ jccb(Assembler::equal, std_cpuid4);
 214 
 215     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 216     __ movl(Address(rsi, 0), rax);
 217     __ movl(Address(rsi, 4), rbx);
 218     __ movl(Address(rsi, 8), rcx);
 219     __ movl(Address(rsi,12), rdx);
 220 
 221     __ movl(rax, 0xb);
 222     __ movl(rcx, 2);     // Packages level
 223     __ cpuid();
 224     __ push(rax);
 225     __ andl(rax, 0x1f);  // Determine if valid topology level
 226     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 227     __ andl(rax, 0xffff);
 228     __ pop(rax);
 229     __ jccb(Assembler::equal, std_cpuid4);
 230 
 231     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 232     __ movl(Address(rsi, 0), rax);
 233     __ movl(Address(rsi, 4), rbx);
 234     __ movl(Address(rsi, 8), rcx);
 235     __ movl(Address(rsi,12), rdx);
 236 
 237     //
 238     // cpuid(0x4) Deterministic cache params
 239     //
 240     __ bind(std_cpuid4);
 241     __ movl(rax, 4);
 242     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 243     __ jccb(Assembler::greater, std_cpuid1);
 244 
 245     __ xorl(rcx, rcx);   // L1 cache
 246     __ cpuid();
 247     __ push(rax);
 248     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 249     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 250     __ pop(rax);
 251     __ jccb(Assembler::equal, std_cpuid1);
 252 
 253     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 254     __ movl(Address(rsi, 0), rax);
 255     __ movl(Address(rsi, 4), rbx);
 256     __ movl(Address(rsi, 8), rcx);
 257     __ movl(Address(rsi,12), rdx);
 258 
 259     //
 260     // Standard cpuid(0x1)
 261     //
 262     __ bind(std_cpuid1);
 263     __ movl(rax, 1);
 264     __ cpuid();
 265     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 266     __ movl(Address(rsi, 0), rax);
 267     __ movl(Address(rsi, 4), rbx);
 268     __ movl(Address(rsi, 8), rcx);
 269     __ movl(Address(rsi,12), rdx);
 270 
 271     //
 272     // Check if OS has enabled XGETBV instruction to access XCR0
 273     // (OSXSAVE feature flag) and CPU supports AVX
 274     //
 275     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 276     __ cmpl(rcx, 0x18000000);
 277     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 278 
 279     //
 280     // XCR0, XFEATURE_ENABLED_MASK register
 281     //
 282     __ xorl(rcx, rcx);   // zero for XCR0 register
 283     __ xgetbv();
 284     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 285     __ movl(Address(rsi, 0), rax);
 286     __ movl(Address(rsi, 4), rdx);
 287 
 288     //
 289     // cpuid(0x7) Structured Extended Features
 290     //
 291     __ bind(sef_cpuid);
 292     __ movl(rax, 7);
 293     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 294     __ jccb(Assembler::greater, ext_cpuid);
 295 
 296     __ xorl(rcx, rcx);
 297     __ cpuid();
 298     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 299     __ movl(Address(rsi, 0), rax);
 300     __ movl(Address(rsi, 4), rbx);
 301     __ movl(Address(rsi, 8), rcx);
 302     __ movl(Address(rsi, 12), rdx);
 303 
 304     //
 305     // Extended cpuid(0x80000000)
 306     //
 307     __ bind(ext_cpuid);
 308     __ movl(rax, 0x80000000);
 309     __ cpuid();
 310     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 311     __ jcc(Assembler::belowEqual, done);
 312     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 313     __ jcc(Assembler::belowEqual, ext_cpuid1);
 314     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 315     __ jccb(Assembler::belowEqual, ext_cpuid5);
 316     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 317     __ jccb(Assembler::belowEqual, ext_cpuid7);
 318     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 319     __ jccb(Assembler::belowEqual, ext_cpuid8);
 320     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 321     __ jccb(Assembler::below, ext_cpuid8);
 322     //
 323     // Extended cpuid(0x8000001E)
 324     //
 325     __ movl(rax, 0x8000001E);
 326     __ cpuid();
 327     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 328     __ movl(Address(rsi, 0), rax);
 329     __ movl(Address(rsi, 4), rbx);
 330     __ movl(Address(rsi, 8), rcx);
 331     __ movl(Address(rsi,12), rdx);
 332 
 333     //
 334     // Extended cpuid(0x80000008)
 335     //
 336     __ bind(ext_cpuid8);
 337     __ movl(rax, 0x80000008);
 338     __ cpuid();
 339     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 340     __ movl(Address(rsi, 0), rax);
 341     __ movl(Address(rsi, 4), rbx);
 342     __ movl(Address(rsi, 8), rcx);
 343     __ movl(Address(rsi,12), rdx);
 344 
 345     //
 346     // Extended cpuid(0x80000007)
 347     //
 348     __ bind(ext_cpuid7);
 349     __ movl(rax, 0x80000007);
 350     __ cpuid();
 351     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 352     __ movl(Address(rsi, 0), rax);
 353     __ movl(Address(rsi, 4), rbx);
 354     __ movl(Address(rsi, 8), rcx);
 355     __ movl(Address(rsi,12), rdx);
 356 
 357     //
 358     // Extended cpuid(0x80000005)
 359     //
 360     __ bind(ext_cpuid5);
 361     __ movl(rax, 0x80000005);
 362     __ cpuid();
 363     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 364     __ movl(Address(rsi, 0), rax);
 365     __ movl(Address(rsi, 4), rbx);
 366     __ movl(Address(rsi, 8), rcx);
 367     __ movl(Address(rsi,12), rdx);
 368 
 369     //
 370     // Extended cpuid(0x80000001)
 371     //
 372     __ bind(ext_cpuid1);
 373     __ movl(rax, 0x80000001);
 374     __ cpuid();
 375     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 376     __ movl(Address(rsi, 0), rax);
 377     __ movl(Address(rsi, 4), rbx);
 378     __ movl(Address(rsi, 8), rcx);
 379     __ movl(Address(rsi,12), rdx);
 380 
 381     //
 382     // Check if OS has enabled XGETBV instruction to access XCR0
 383     // (OSXSAVE feature flag) and CPU supports AVX
 384     //
 385     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 386     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 387     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 388     __ cmpl(rcx, 0x18000000);
 389     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 390 
 391     __ movl(rax, 0x6);
 392     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 393     __ cmpl(rax, 0x6);
 394     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 395 
 396     // we need to bridge farther than imm8, so we use this island as a thunk
 397     __ bind(done);
 398     __ jmp(wrapup);
 399 
 400     __ bind(start_simd_check);
 401     //
 402     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 403     // registers are not restored after a signal processing.
 404     // Generate SEGV here (reference through NULL)
 405     // and check upper YMM/ZMM bits after it.
 406     //
 407     int saved_useavx = UseAVX;
 408     int saved_usesse = UseSSE;
 409 
 410     // If UseAVX is uninitialized or is set by the user to include EVEX
 411     if (use_evex) {
 412       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 413       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 414       __ movl(rax, 0x10000);
 415       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 416       __ cmpl(rax, 0x10000);
 417       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 418       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 419       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 420       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 421       __ movl(rax, 0xE0);
 422       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 423       __ cmpl(rax, 0xE0);
 424       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 425 
 426       if (FLAG_IS_DEFAULT(UseAVX)) {
 427         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 428         __ movl(rax, Address(rsi, 0));
 429         __ cmpl(rax, 0x50654);              // If it is Skylake
 430         __ jcc(Assembler::equal, legacy_setup);
 431       }
 432       // EVEX setup: run in lowest evex mode
 433       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 434       UseAVX = 3;
 435       UseSSE = 2;
 436 #ifdef _WINDOWS
 437       // xmm5-xmm15 are not preserved by caller on windows
 438       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 439       __ subptr(rsp, 64);
 440       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 441 #ifdef _LP64
 442       __ subptr(rsp, 64);
 443       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 444       __ subptr(rsp, 64);
 445       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 446 #endif // _LP64
 447 #endif // _WINDOWS
 448 
 449       // load value into all 64 bytes of zmm7 register
 450       __ movl(rcx, VM_Version::ymm_test_value());
 451       __ movdl(xmm0, rcx);
 452       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 453       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 454 #ifdef _LP64
 455       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 456       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 457 #endif
 458       VM_Version::clean_cpuFeatures();
 459       __ jmp(save_restore_except);
 460     }
 461 
 462     __ bind(legacy_setup);
 463     // AVX setup
 464     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 465     UseAVX = 1;
 466     UseSSE = 2;
 467 #ifdef _WINDOWS
 468     __ subptr(rsp, 32);
 469     __ vmovdqu(Address(rsp, 0), xmm7);
 470 #ifdef _LP64
 471     __ subptr(rsp, 32);
 472     __ vmovdqu(Address(rsp, 0), xmm8);
 473     __ subptr(rsp, 32);
 474     __ vmovdqu(Address(rsp, 0), xmm15);
 475 #endif // _LP64
 476 #endif // _WINDOWS
 477 
 478     // load value into all 32 bytes of ymm7 register
 479     __ movl(rcx, VM_Version::ymm_test_value());
 480 
 481     __ movdl(xmm0, rcx);
 482     __ pshufd(xmm0, xmm0, 0x00);
 483     __ vinsertf128_high(xmm0, xmm0);
 484     __ vmovdqu(xmm7, xmm0);
 485 #ifdef _LP64
 486     __ vmovdqu(xmm8, xmm0);
 487     __ vmovdqu(xmm15, xmm0);
 488 #endif
 489     VM_Version::clean_cpuFeatures();
 490 
 491     __ bind(save_restore_except);
 492     __ xorl(rsi, rsi);
 493     VM_Version::set_cpuinfo_segv_addr(__ pc());
 494     // Generate SEGV
 495     __ movl(rax, Address(rsi, 0));
 496 
 497     VM_Version::set_cpuinfo_cont_addr(__ pc());
 498     // Returns here after signal. Save xmm0 to check it later.
 499 
 500     // If UseAVX is uninitialized or is set by the user to include EVEX
 501     if (use_evex) {
 502       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 503       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 504       __ movl(rax, 0x10000);
 505       __ andl(rax, Address(rsi, 4));
 506       __ cmpl(rax, 0x10000);
 507       __ jcc(Assembler::notEqual, legacy_save_restore);
 508       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 509       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 510       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 511       __ movl(rax, 0xE0);
 512       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 513       __ cmpl(rax, 0xE0);
 514       __ jcc(Assembler::notEqual, legacy_save_restore);
 515 
 516       if (FLAG_IS_DEFAULT(UseAVX)) {
 517         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 518         __ movl(rax, Address(rsi, 0));
 519         __ cmpl(rax, 0x50654);              // If it is Skylake
 520         __ jcc(Assembler::equal, legacy_save_restore);
 521       }
 522       // EVEX check: run in lowest evex mode
 523       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 524       UseAVX = 3;
 525       UseSSE = 2;
 526       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 527       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 528       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 529 #ifdef _LP64
 530       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 531       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 532 #endif
 533 
 534 #ifdef _WINDOWS
 535 #ifdef _LP64
 536       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 537       __ addptr(rsp, 64);
 538       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 539       __ addptr(rsp, 64);
 540 #endif // _LP64
 541       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 542       __ addptr(rsp, 64);
 543 #endif // _WINDOWS
 544       generate_vzeroupper(wrapup);
 545       VM_Version::clean_cpuFeatures();
 546       UseAVX = saved_useavx;
 547       UseSSE = saved_usesse;
 548       __ jmp(wrapup);
 549    }
 550 
 551     __ bind(legacy_save_restore);
 552     // AVX check
 553     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 554     UseAVX = 1;
 555     UseSSE = 2;
 556     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 557     __ vmovdqu(Address(rsi, 0), xmm0);
 558     __ vmovdqu(Address(rsi, 32), xmm7);
 559 #ifdef _LP64
 560     __ vmovdqu(Address(rsi, 64), xmm8);
 561     __ vmovdqu(Address(rsi, 96), xmm15);
 562 #endif
 563 
 564 #ifdef _WINDOWS
 565 #ifdef _LP64
 566     __ vmovdqu(xmm15, Address(rsp, 0));
 567     __ addptr(rsp, 32);
 568     __ vmovdqu(xmm8, Address(rsp, 0));
 569     __ addptr(rsp, 32);
 570 #endif // _LP64
 571     __ vmovdqu(xmm7, Address(rsp, 0));
 572     __ addptr(rsp, 32);
 573 #endif // _WINDOWS
 574     generate_vzeroupper(wrapup);
 575     VM_Version::clean_cpuFeatures();
 576     UseAVX = saved_useavx;
 577     UseSSE = saved_usesse;
 578 
 579     __ bind(wrapup);
 580     __ popf();
 581     __ pop(rsi);
 582     __ pop(rbx);
 583     __ pop(rbp);
 584     __ ret(0);
 585 
 586 #   undef __
 587 
 588     return start;
 589   };
 590   void generate_vzeroupper(Label& L_wrapup) {
 591 #   define __ _masm->
 592     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 593     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 594     __ jcc(Assembler::notEqual, L_wrapup);
 595     __ movl(rcx, 0x0FFF0FF0);
 596     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 597     __ andl(rcx, Address(rsi, 0));
 598     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 599     __ jcc(Assembler::equal, L_wrapup);
 600     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 601     __ jcc(Assembler::equal, L_wrapup);
 602     // vzeroupper() will use a pre-computed instruction sequence that we
 603     // can't compute until after we've determined CPU capabilities. Use
 604     // uncached variant here directly to be able to bootstrap correctly
 605     __ vzeroupper_uncached();
 606 #   undef __
 607   }
 608   address generate_detect_virt() {
 609     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 610 #   define __ _masm->
 611 
 612     address start = __ pc();
 613 
 614     // Evacuate callee-saved registers
 615     __ push(rbp);
 616     __ push(rbx);
 617     __ push(rsi); // for Windows
 618 
 619 #ifdef _LP64
 620     __ mov(rax, c_rarg0); // CPUID leaf
 621     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 622 #else
 623     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 624     __ movptr(rsi, Address(rsp, 20)); // register array address
 625 #endif
 626 
 627     __ cpuid();
 628 
 629     // Store result to register array
 630     __ movl(Address(rsi,  0), rax);
 631     __ movl(Address(rsi,  4), rbx);
 632     __ movl(Address(rsi,  8), rcx);
 633     __ movl(Address(rsi, 12), rdx);
 634 
 635     // Epilogue
 636     __ pop(rsi);
 637     __ pop(rbx);
 638     __ pop(rbp);
 639     __ ret(0);
 640 
 641 #   undef __
 642 
 643     return start;
 644   };
 645 
 646 
 647   address generate_getCPUIDBrandString(void) {
 648     // Flags to test CPU type.
 649     const uint32_t HS_EFL_AC           = 0x40000;
 650     const uint32_t HS_EFL_ID           = 0x200000;
 651     // Values for when we don't have a CPUID instruction.
 652     const int      CPU_FAMILY_SHIFT = 8;
 653     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 654     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 655 
 656     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 657 
 658     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 659 #   define __ _masm->
 660 
 661     address start = __ pc();
 662 
 663     //
 664     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 665     //
 666     // LP64: rcx and rdx are first and second argument registers on windows
 667 
 668     __ push(rbp);
 669 #ifdef _LP64
 670     __ mov(rbp, c_rarg0); // cpuid_info address
 671 #else
 672     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 673 #endif
 674     __ push(rbx);
 675     __ push(rsi);
 676     __ pushf();          // preserve rbx, and flags
 677     __ pop(rax);
 678     __ push(rax);
 679     __ mov(rcx, rax);
 680     //
 681     // if we are unable to change the AC flag, we have a 386
 682     //
 683     __ xorl(rax, HS_EFL_AC);
 684     __ push(rax);
 685     __ popf();
 686     __ pushf();
 687     __ pop(rax);
 688     __ cmpptr(rax, rcx);
 689     __ jccb(Assembler::notEqual, detect_486);
 690 
 691     __ movl(rax, CPU_FAMILY_386);
 692     __ jmp(done);
 693 
 694     //
 695     // If we are unable to change the ID flag, we have a 486 which does
 696     // not support the "cpuid" instruction.
 697     //
 698     __ bind(detect_486);
 699     __ mov(rax, rcx);
 700     __ xorl(rax, HS_EFL_ID);
 701     __ push(rax);
 702     __ popf();
 703     __ pushf();
 704     __ pop(rax);
 705     __ cmpptr(rcx, rax);
 706     __ jccb(Assembler::notEqual, detect_586);
 707 
 708     __ bind(cpu486);
 709     __ movl(rax, CPU_FAMILY_486);
 710     __ jmp(done);
 711 
 712     //
 713     // At this point, we have a chip which supports the "cpuid" instruction
 714     //
 715     __ bind(detect_586);
 716     __ xorl(rax, rax);
 717     __ cpuid();
 718     __ orl(rax, rax);
 719     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 720                                         // value of at least 1, we give up and
 721                                         // assume a 486
 722 
 723     //
 724     // Extended cpuid(0x80000000) for processor brand string detection
 725     //
 726     __ bind(ext_cpuid);
 727     __ movl(rax, CPUID_EXTENDED_FN);
 728     __ cpuid();
 729     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 730     __ jcc(Assembler::below, done);
 731 
 732     //
 733     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 734     //
 735     __ movl(rax, CPUID_EXTENDED_FN_2);
 736     __ cpuid();
 737     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 738     __ movl(Address(rsi, 0), rax);
 739     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 740     __ movl(Address(rsi, 0), rbx);
 741     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 742     __ movl(Address(rsi, 0), rcx);
 743     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 744     __ movl(Address(rsi,0), rdx);
 745 
 746     //
 747     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 748     //
 749     __ movl(rax, CPUID_EXTENDED_FN_3);
 750     __ cpuid();
 751     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 752     __ movl(Address(rsi, 0), rax);
 753     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 754     __ movl(Address(rsi, 0), rbx);
 755     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 756     __ movl(Address(rsi, 0), rcx);
 757     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 758     __ movl(Address(rsi,0), rdx);
 759 
 760     //
 761     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 762     //
 763     __ movl(rax, CPUID_EXTENDED_FN_4);
 764     __ cpuid();
 765     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 766     __ movl(Address(rsi, 0), rax);
 767     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 768     __ movl(Address(rsi, 0), rbx);
 769     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 770     __ movl(Address(rsi, 0), rcx);
 771     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 772     __ movl(Address(rsi,0), rdx);
 773 
 774     //
 775     // return
 776     //
 777     __ bind(done);
 778     __ popf();
 779     __ pop(rsi);
 780     __ pop(rbx);
 781     __ pop(rbp);
 782     __ ret(0);
 783 
 784 #   undef __
 785 
 786     return start;
 787   };
 788 };
 789 
 790 void VM_Version::get_processor_features() {
 791 
 792   _cpu = 4; // 486 by default
 793   _model = 0;
 794   _stepping = 0;
 795   _features = 0;
 796   _logical_processors_per_package = 1;
 797   // i486 internal cache is both I&D and has a 16-byte line size
 798   _L1_data_cache_line_size = 16;
 799 
 800   // Get raw processor info
 801 
 802   get_cpu_info_stub(&_cpuid_info);
 803 
 804   assert_is_initialized();
 805   _cpu = extended_cpu_family();
 806   _model = extended_cpu_model();
 807   _stepping = cpu_stepping();
 808 
 809   if (cpu_family() > 4) { // it supports CPUID
 810     _features = feature_flags();
 811     // Logical processors are only available on P4s and above,
 812     // and only if hyperthreading is available.
 813     _logical_processors_per_package = logical_processor_count();
 814     _L1_data_cache_line_size = L1_line_size();
 815   }
 816 
 817   _supports_cx8 = supports_cmpxchg8();
 818   // xchg and xadd instructions
 819   _supports_atomic_getset4 = true;
 820   _supports_atomic_getadd4 = true;
 821   LP64_ONLY(_supports_atomic_getset8 = true);
 822   LP64_ONLY(_supports_atomic_getadd8 = true);
 823 
 824 #ifdef _LP64
 825   // OS should support SSE for x64 and hardware should support at least SSE2.
 826   if (!VM_Version::supports_sse2()) {
 827     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 828   }
 829   // in 64 bit the use of SSE2 is the minimum
 830   if (UseSSE < 2) UseSSE = 2;
 831 #endif
 832 
 833 #ifdef AMD64
 834   // flush_icache_stub have to be generated first.
 835   // That is why Icache line size is hard coded in ICache class,
 836   // see icache_x86.hpp. It is also the reason why we can't use
 837   // clflush instruction in 32-bit VM since it could be running
 838   // on CPU which does not support it.
 839   //
 840   // The only thing we can do is to verify that flushed
 841   // ICache::line_size has correct value.
 842   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 843   // clflush_size is size in quadwords (8 bytes).
 844   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 845 #endif
 846 
 847 #ifdef _LP64
 848   // assigning this field effectively enables Unsafe.writebackMemory()
 849   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 850   // that is only implemented on x86_64 and only if the OS plays ball
 851   if (os::supports_map_sync()) {
 852     // publish data cache line flush size to generic field, otherwise
 853     // let if default to zero thereby disabling writeback
 854     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 855   }
 856 #endif
 857 
 858   if (UseSSE < 4) {
 859     _features &= ~CPU_SSE4_1;
 860     _features &= ~CPU_SSE4_2;
 861   }
 862 
 863   if (UseSSE < 3) {
 864     _features &= ~CPU_SSE3;
 865     _features &= ~CPU_SSSE3;
 866     _features &= ~CPU_SSE4A;
 867   }
 868 
 869   if (UseSSE < 2)
 870     _features &= ~CPU_SSE2;
 871 
 872   if (UseSSE < 1)
 873     _features &= ~CPU_SSE;
 874 
 875   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 876   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 877     UseAVX = 0;
 878   }
 879 
 880   // UseSSE is set to the smaller of what hardware supports and what
 881   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 882   // older Pentiums which do not support it.
 883   int use_sse_limit = 0;
 884   if (UseSSE > 0) {
 885     if (UseSSE > 3 && supports_sse4_1()) {
 886       use_sse_limit = 4;
 887     } else if (UseSSE > 2 && supports_sse3()) {
 888       use_sse_limit = 3;
 889     } else if (UseSSE > 1 && supports_sse2()) {
 890       use_sse_limit = 2;
 891     } else if (UseSSE > 0 && supports_sse()) {
 892       use_sse_limit = 1;
 893     } else {
 894       use_sse_limit = 0;
 895     }
 896   }
 897   if (FLAG_IS_DEFAULT(UseSSE)) {
 898     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 899   } else if (UseSSE > use_sse_limit) {
 900     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 901     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 902   }
 903 
 904   // first try initial setting and detect what we can support
 905   int use_avx_limit = 0;
 906   if (UseAVX > 0) {
 907     if (UseSSE < 4) {
 908       // Don't use AVX if SSE is unavailable or has been disabled.
 909       use_avx_limit = 0;
 910     } else if (UseAVX > 2 && supports_evex()) {
 911       use_avx_limit = 3;
 912     } else if (UseAVX > 1 && supports_avx2()) {
 913       use_avx_limit = 2;
 914     } else if (UseAVX > 0 && supports_avx()) {
 915       use_avx_limit = 1;
 916     } else {
 917       use_avx_limit = 0;
 918     }
 919   }
 920   if (FLAG_IS_DEFAULT(UseAVX)) {
 921     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 922     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 923       FLAG_SET_DEFAULT(UseAVX, 2);
 924     } else {
 925       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 926     }
 927   }
 928   if (UseAVX > use_avx_limit) {
 929     if (UseSSE < 4) {
 930       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 931     } else {
 932       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 933     }
 934     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 935   }
 936 
 937   if (UseAVX < 3) {
 938     _features &= ~CPU_AVX512F;
 939     _features &= ~CPU_AVX512DQ;
 940     _features &= ~CPU_AVX512CD;
 941     _features &= ~CPU_AVX512BW;
 942     _features &= ~CPU_AVX512VL;
 943     _features &= ~CPU_AVX512_VPOPCNTDQ;
 944     _features &= ~CPU_AVX512_VPCLMULQDQ;
 945     _features &= ~CPU_AVX512_VAES;
 946     _features &= ~CPU_AVX512_VNNI;
 947     _features &= ~CPU_AVX512_VBMI;
 948     _features &= ~CPU_AVX512_VBMI2;
 949     _features &= ~CPU_AVX512_BITALG;
 950     _features &= ~CPU_AVX512_IFMA;
 951   }
 952 
 953   if (UseAVX < 2)
 954     _features &= ~CPU_AVX2;
 955 
 956   if (UseAVX < 1) {
 957     _features &= ~CPU_AVX;
 958     _features &= ~CPU_VZEROUPPER;
 959     _features &= ~CPU_F16C;
 960   }
 961 
 962   if (logical_processors_per_package() == 1) {
 963     // HT processor could be installed on a system which doesn't support HT.
 964     _features &= ~CPU_HT;
 965   }
 966 
 967   if (is_intel()) { // Intel cpus specific settings
 968     if (is_knights_family()) {
 969       _features &= ~CPU_VZEROUPPER;
 970       _features &= ~CPU_AVX512BW;
 971       _features &= ~CPU_AVX512VL;
 972       _features &= ~CPU_AVX512DQ;
 973       _features &= ~CPU_AVX512_VNNI;
 974       _features &= ~CPU_AVX512_VAES;
 975       _features &= ~CPU_AVX512_VPOPCNTDQ;
 976       _features &= ~CPU_AVX512_VPCLMULQDQ;
 977       _features &= ~CPU_AVX512_VBMI;
 978       _features &= ~CPU_AVX512_VBMI2;
 979       _features &= ~CPU_CLWB;
 980       _features &= ~CPU_FLUSHOPT;
 981       _features &= ~CPU_GFNI;
 982       _features &= ~CPU_AVX512_BITALG;
 983       _features &= ~CPU_AVX512_IFMA;
 984     }
 985   }
 986 
 987   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
 988     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
 989   } else {
 990     _has_intel_jcc_erratum = IntelJccErratumMitigation;
 991   }
 992 
 993   char buf[1024];
 994   int res = jio_snprintf(
 995               buf, sizeof(buf),
 996               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
 997               cores_per_cpu(), threads_per_core(),
 998               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
 999   assert(res > 0, "not enough temporary space allocated");
1000   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1001 
1002   _features_string = os::strdup(buf);
1003 
1004   // Use AES instructions if available.
1005   if (supports_aes()) {
1006     if (FLAG_IS_DEFAULT(UseAES)) {
1007       FLAG_SET_DEFAULT(UseAES, true);
1008     }
1009     if (!UseAES) {
1010       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1011         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1012       }
1013       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1014     } else {
1015       if (UseSSE > 2) {
1016         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1017           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1018         }
1019       } else {
1020         // The AES intrinsic stubs require AES instruction support (of course)
1021         // but also require sse3 mode or higher for instructions it use.
1022         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1023           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1024         }
1025         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1026       }
1027 
1028       // --AES-CTR begins--
1029       if (!UseAESIntrinsics) {
1030         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1031           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1032           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1033         }
1034       } else {
1035         if (supports_sse4_1()) {
1036           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1037             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1038           }
1039         } else {
1040            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1041            // but also require sse4.1 mode or higher for instructions it use.
1042           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1043              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1044            }
1045            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1046         }
1047       }
1048       // --AES-CTR ends--
1049     }
1050   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1051     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1052       warning("AES instructions are not available on this CPU");
1053       FLAG_SET_DEFAULT(UseAES, false);
1054     }
1055     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1056       warning("AES intrinsics are not available on this CPU");
1057       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1058     }
1059     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1060       warning("AES-CTR intrinsics are not available on this CPU");
1061       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1062     }
1063   }
1064 
1065   // Use CLMUL instructions if available.
1066   if (supports_clmul()) {
1067     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1068       UseCLMUL = true;
1069     }
1070   } else if (UseCLMUL) {
1071     if (!FLAG_IS_DEFAULT(UseCLMUL))
1072       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1073     FLAG_SET_DEFAULT(UseCLMUL, false);
1074   }
1075 
1076   if (UseCLMUL && (UseSSE > 2)) {
1077     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1078       UseCRC32Intrinsics = true;
1079     }
1080   } else if (UseCRC32Intrinsics) {
1081     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1082       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1083     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1084   }
1085 
1086 #ifdef _LP64
1087   if (supports_avx2()) {
1088     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1089       UseAdler32Intrinsics = true;
1090     }
1091   } else if (UseAdler32Intrinsics) {
1092     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1093       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1094     }
1095     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1096   }
1097 #else
1098   if (UseAdler32Intrinsics) {
1099     warning("Adler32Intrinsics not available on this CPU.");
1100     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1101   }
1102 #endif
1103 
1104   if (supports_sse4_2() && supports_clmul()) {
1105     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1106       UseCRC32CIntrinsics = true;
1107     }
1108   } else if (UseCRC32CIntrinsics) {
1109     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1110       warning("CRC32C intrinsics are not available on this CPU");
1111     }
1112     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1113   }
1114 
1115   // GHASH/GCM intrinsics
1116   if (UseCLMUL && (UseSSE > 2)) {
1117     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1118       UseGHASHIntrinsics = true;
1119     }
1120   } else if (UseGHASHIntrinsics) {
1121     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1122       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1123     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1124   }
1125 
1126   // ChaCha20 Intrinsics
1127   // As long as the system supports AVX as a baseline we can do a
1128   // SIMD-enabled block function.  StubGenerator makes the determination
1129   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1130   // version.
1131   if (UseAVX >= 1) {
1132       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1133           UseChaCha20Intrinsics = true;
1134       }
1135   } else if (UseChaCha20Intrinsics) {
1136       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1137           warning("ChaCha20 intrinsic requires AVX instructions");
1138       }
1139       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1140   }
1141 
1142   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1143   if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) {
1144     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1145       UseBASE64Intrinsics = true;
1146     }
1147   } else if (UseBASE64Intrinsics) {
1148      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1149       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1150     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1151   }
1152 
1153   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1154     if (FLAG_IS_DEFAULT(UseFMA)) {
1155       UseFMA = true;
1156     }
1157   } else if (UseFMA) {
1158     warning("FMA instructions are not available on this CPU");
1159     FLAG_SET_DEFAULT(UseFMA, false);
1160   }
1161 
1162   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1163     UseMD5Intrinsics = true;
1164   }
1165 
1166   if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
1167     if (FLAG_IS_DEFAULT(UseSHA)) {
1168       UseSHA = true;
1169     }
1170   } else if (UseSHA) {
1171     warning("SHA instructions are not available on this CPU");
1172     FLAG_SET_DEFAULT(UseSHA, false);
1173   }
1174 
1175   if (supports_sha() && supports_sse4_1() && UseSHA) {
1176     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1177       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1178     }
1179   } else if (UseSHA1Intrinsics) {
1180     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1181     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1182   }
1183 
1184   if (supports_sse4_1() && UseSHA) {
1185     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1186       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1187     }
1188   } else if (UseSHA256Intrinsics) {
1189     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1190     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1191   }
1192 
1193 #ifdef _LP64
1194   // These are only supported on 64-bit
1195   if (UseSHA && supports_avx2() && supports_bmi2()) {
1196     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1197       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1198     }
1199   } else
1200 #endif
1201   if (UseSHA512Intrinsics) {
1202     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1203     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1204   }
1205 
1206   if (UseSHA3Intrinsics) {
1207     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1208     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1209   }
1210 
1211   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1212     FLAG_SET_DEFAULT(UseSHA, false);
1213   }
1214 
1215   if (!supports_rtm() && UseRTMLocking) {
1216     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1217   }
1218 
1219 #if INCLUDE_RTM_OPT
1220   if (UseRTMLocking) {
1221     if (!CompilerConfig::is_c2_enabled()) {
1222       // Only C2 does RTM locking optimization.
1223       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1224     }
1225     if (is_intel_family_core()) {
1226       if ((_model == CPU_MODEL_HASWELL_E3) ||
1227           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1228           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1229         // currently a collision between SKL and HSW_E3
1230         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1231           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1232                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1233         } else {
1234           warning("UseRTMLocking is only available as experimental option on this platform.");
1235         }
1236       }
1237     }
1238     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1239       // RTM locking should be used only for applications with
1240       // high lock contention. For now we do not use it by default.
1241       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1242     }
1243   } else { // !UseRTMLocking
1244     if (UseRTMForStackLocks) {
1245       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1246         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1247       }
1248       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1249     }
1250     if (UseRTMDeopt) {
1251       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1252     }
1253     if (PrintPreciseRTMLockingStatistics) {
1254       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1255     }
1256   }
1257 #else
1258   if (UseRTMLocking) {
1259     // Only C2 does RTM locking optimization.
1260     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1261   }
1262 #endif
1263 
1264 #ifdef COMPILER2
1265   if (UseFPUForSpilling) {
1266     if (UseSSE < 2) {
1267       // Only supported with SSE2+
1268       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1269     }
1270   }
1271 #endif
1272 
1273 #if COMPILER2_OR_JVMCI
1274   int max_vector_size = 0;
1275   if (UseSSE < 2) {
1276     // Vectors (in XMM) are only supported with SSE2+
1277     // SSE is always 2 on x64.
1278     max_vector_size = 0;
1279   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1280     // 16 byte vectors (in XMM) are supported with SSE2+
1281     max_vector_size = 16;
1282   } else if (UseAVX == 1 || UseAVX == 2) {
1283     // 32 bytes vectors (in YMM) are only supported with AVX+
1284     max_vector_size = 32;
1285   } else if (UseAVX > 2) {
1286     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1287     max_vector_size = 64;
1288   }
1289 
1290 #ifdef _LP64
1291   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1292 #else
1293   int min_vector_size = 0;
1294 #endif
1295 
1296   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1297     if (MaxVectorSize < min_vector_size) {
1298       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1299       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1300     }
1301     if (MaxVectorSize > max_vector_size) {
1302       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1303       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1304     }
1305     if (!is_power_of_2(MaxVectorSize)) {
1306       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1307       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1308     }
1309   } else {
1310     // If default, use highest supported configuration
1311     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1312   }
1313 
1314 #if defined(COMPILER2)
1315   if (FLAG_IS_DEFAULT(SuperWordMaxVectorSize)) {
1316     if (FLAG_IS_DEFAULT(UseAVX) && UseAVX > 2 &&
1317         is_intel_skylake() && _stepping >= 5) {
1318       // Limit auto vectorization to 256 bit (32 byte) by default on Cascade Lake
1319       FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MIN2(MaxVectorSize, (intx)32));
1320     } else {
1321       FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
1322     }
1323   } else {
1324     if (SuperWordMaxVectorSize > MaxVectorSize) {
1325       warning("SuperWordMaxVectorSize cannot be greater than MaxVectorSize %i", (int) MaxVectorSize);
1326       FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
1327     }
1328     if (!is_power_of_2(SuperWordMaxVectorSize)) {
1329       warning("SuperWordMaxVectorSize must be a power of 2, setting to MaxVectorSize: %i", (int) MaxVectorSize);
1330       FLAG_SET_DEFAULT(SuperWordMaxVectorSize, MaxVectorSize);
1331     }
1332   }
1333 #endif
1334 
1335 #if defined(COMPILER2) && defined(ASSERT)
1336   if (MaxVectorSize > 0) {
1337     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1338       tty->print_cr("State of YMM registers after signal handle:");
1339       int nreg = 2 LP64_ONLY(+2);
1340       const char* ymm_name[4] = {"0", "7", "8", "15"};
1341       for (int i = 0; i < nreg; i++) {
1342         tty->print("YMM%s:", ymm_name[i]);
1343         for (int j = 7; j >=0; j--) {
1344           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1345         }
1346         tty->cr();
1347       }
1348     }
1349   }
1350 #endif // COMPILER2 && ASSERT
1351 
1352 #ifdef _LP64
1353   if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) {
1354     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1355       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1356     }
1357   } else
1358 #endif
1359   if (UsePoly1305Intrinsics) {
1360     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1361     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1362   }
1363 
1364 #ifdef _LP64
1365   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1366     UseMultiplyToLenIntrinsic = true;
1367   }
1368   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1369     UseSquareToLenIntrinsic = true;
1370   }
1371   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1372     UseMulAddIntrinsic = true;
1373   }
1374   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1375     UseMontgomeryMultiplyIntrinsic = true;
1376   }
1377   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1378     UseMontgomerySquareIntrinsic = true;
1379   }
1380 #else
1381   if (UseMultiplyToLenIntrinsic) {
1382     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1383       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1384     }
1385     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1386   }
1387   if (UseMontgomeryMultiplyIntrinsic) {
1388     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1389       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1390     }
1391     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1392   }
1393   if (UseMontgomerySquareIntrinsic) {
1394     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1395       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1396     }
1397     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1398   }
1399   if (UseSquareToLenIntrinsic) {
1400     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1401       warning("squareToLen intrinsic is not available in 32-bit VM");
1402     }
1403     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1404   }
1405   if (UseMulAddIntrinsic) {
1406     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1407       warning("mulAdd intrinsic is not available in 32-bit VM");
1408     }
1409     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1410   }
1411 #endif // _LP64
1412 #endif // COMPILER2_OR_JVMCI
1413 
1414   // On new cpus instructions which update whole XMM register should be used
1415   // to prevent partial register stall due to dependencies on high half.
1416   //
1417   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1418   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1419   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1420   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1421 
1422 
1423   if (is_zx()) { // ZX cpus specific settings
1424     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1425       UseStoreImmI16 = false; // don't use it on ZX cpus
1426     }
1427     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1428       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1429         // Use it on all ZX cpus
1430         UseAddressNop = true;
1431       }
1432     }
1433     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1434       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1435     }
1436     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1437       if (supports_sse3()) {
1438         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1439       } else {
1440         UseXmmRegToRegMoveAll = false;
1441       }
1442     }
1443     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1444 #ifdef COMPILER2
1445       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1446         // For new ZX cpus do the next optimization:
1447         // don't align the beginning of a loop if there are enough instructions
1448         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1449         // in current fetch line (OptoLoopAlignment) or the padding
1450         // is big (> MaxLoopPad).
1451         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1452         // generated NOP instructions. 11 is the largest size of one
1453         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1454         MaxLoopPad = 11;
1455       }
1456 #endif // COMPILER2
1457       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1458         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1459       }
1460       if (supports_sse4_2()) { // new ZX cpus
1461         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1462           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1463         }
1464       }
1465       if (supports_sse4_2()) {
1466         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1467           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1468         }
1469       } else {
1470         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1471           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1472         }
1473         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1474       }
1475     }
1476 
1477     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1478       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1479     }
1480   }
1481 
1482   if (is_amd_family()) { // AMD cpus specific settings
1483     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1484       // Use it on new AMD cpus starting from Opteron.
1485       UseAddressNop = true;
1486     }
1487     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1488       // Use it on new AMD cpus starting from Opteron.
1489       UseNewLongLShift = true;
1490     }
1491     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1492       if (supports_sse4a()) {
1493         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1494       } else {
1495         UseXmmLoadAndClearUpper = false;
1496       }
1497     }
1498     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1499       if (supports_sse4a()) {
1500         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1501       } else {
1502         UseXmmRegToRegMoveAll = false;
1503       }
1504     }
1505     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1506       if (supports_sse4a()) {
1507         UseXmmI2F = true;
1508       } else {
1509         UseXmmI2F = false;
1510       }
1511     }
1512     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1513       if (supports_sse4a()) {
1514         UseXmmI2D = true;
1515       } else {
1516         UseXmmI2D = false;
1517       }
1518     }
1519     if (supports_sse4_2()) {
1520       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1521         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1522       }
1523     } else {
1524       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1525         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1526       }
1527       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1528     }
1529 
1530     // some defaults for AMD family 15h
1531     if (cpu_family() == 0x15) {
1532       // On family 15h processors default is no sw prefetch
1533       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1534         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1535       }
1536       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1537       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1538         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1539       }
1540       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1541       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1542         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1543       }
1544       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1545         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1546       }
1547     }
1548 
1549 #ifdef COMPILER2
1550     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1551       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1552       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1553     }
1554 #endif // COMPILER2
1555 
1556     // Some defaults for AMD family >= 17h && Hygon family 18h
1557     if (cpu_family() >= 0x17) {
1558       // On family >=17h processors use XMM and UnalignedLoadStores
1559       // for Array Copy
1560       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1561         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1562       }
1563       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1564         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1565       }
1566 #ifdef COMPILER2
1567       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1568         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1569       }
1570 #endif
1571     }
1572   }
1573 
1574   if (is_intel()) { // Intel cpus specific settings
1575     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1576       UseStoreImmI16 = false; // don't use it on Intel cpus
1577     }
1578     if (cpu_family() == 6 || cpu_family() == 15) {
1579       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1580         // Use it on all Intel cpus starting from PentiumPro
1581         UseAddressNop = true;
1582       }
1583     }
1584     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1585       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1586     }
1587     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1588       if (supports_sse3()) {
1589         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1590       } else {
1591         UseXmmRegToRegMoveAll = false;
1592       }
1593     }
1594     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1595 #ifdef COMPILER2
1596       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1597         // For new Intel cpus do the next optimization:
1598         // don't align the beginning of a loop if there are enough instructions
1599         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1600         // in current fetch line (OptoLoopAlignment) or the padding
1601         // is big (> MaxLoopPad).
1602         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1603         // generated NOP instructions. 11 is the largest size of one
1604         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1605         MaxLoopPad = 11;
1606       }
1607 #endif // COMPILER2
1608 
1609       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1610         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1611       }
1612       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1613         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1614           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1615         }
1616       }
1617       if (supports_sse4_2()) {
1618         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1619           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1620         }
1621       } else {
1622         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1623           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1624         }
1625         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1626       }
1627     }
1628     if (is_atom_family() || is_knights_family()) {
1629 #ifdef COMPILER2
1630       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1631         OptoScheduling = true;
1632       }
1633 #endif
1634       if (supports_sse4_2()) { // Silvermont
1635         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1636           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1637         }
1638       }
1639       if (FLAG_IS_DEFAULT(UseIncDec)) {
1640         FLAG_SET_DEFAULT(UseIncDec, false);
1641       }
1642     }
1643     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1644       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1645     }
1646 #ifdef COMPILER2
1647     if (UseAVX > 2) {
1648       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1649           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1650            ArrayOperationPartialInlineSize != 0 &&
1651            ArrayOperationPartialInlineSize != 16 &&
1652            ArrayOperationPartialInlineSize != 32 &&
1653            ArrayOperationPartialInlineSize != 64)) {
1654         int inline_size = 0;
1655         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1656           inline_size = 64;
1657         } else if (MaxVectorSize >= 32) {
1658           inline_size = 32;
1659         } else if (MaxVectorSize >= 16) {
1660           inline_size = 16;
1661         }
1662         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1663           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1664         }
1665         ArrayOperationPartialInlineSize = inline_size;
1666       }
1667 
1668       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1669         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1670         if (ArrayOperationPartialInlineSize) {
1671           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1672         } else {
1673           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1674         }
1675       }
1676     }
1677 #endif
1678   }
1679 
1680 #ifdef COMPILER2
1681   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1682     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1683       OptimizeFill = false;
1684     }
1685   }
1686 #endif
1687 
1688 #ifdef _LP64
1689   if (UseSSE42Intrinsics) {
1690     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1691       UseVectorizedMismatchIntrinsic = true;
1692     }
1693   } else if (UseVectorizedMismatchIntrinsic) {
1694     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1695       warning("vectorizedMismatch intrinsics are not available on this CPU");
1696     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1697   }
1698 #else
1699   if (UseVectorizedMismatchIntrinsic) {
1700     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1701       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1702     }
1703     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1704   }
1705 #endif // _LP64
1706 
1707   // Use count leading zeros count instruction if available.
1708   if (supports_lzcnt()) {
1709     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1710       UseCountLeadingZerosInstruction = true;
1711     }
1712    } else if (UseCountLeadingZerosInstruction) {
1713     warning("lzcnt instruction is not available on this CPU");
1714     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1715   }
1716 
1717   // Use count trailing zeros instruction if available
1718   if (supports_bmi1()) {
1719     // tzcnt does not require VEX prefix
1720     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1721       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1722         // Don't use tzcnt if BMI1 is switched off on command line.
1723         UseCountTrailingZerosInstruction = false;
1724       } else {
1725         UseCountTrailingZerosInstruction = true;
1726       }
1727     }
1728   } else if (UseCountTrailingZerosInstruction) {
1729     warning("tzcnt instruction is not available on this CPU");
1730     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1731   }
1732 
1733   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1734   // VEX prefix is generated only when AVX > 0.
1735   if (supports_bmi1() && supports_avx()) {
1736     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1737       UseBMI1Instructions = true;
1738     }
1739   } else if (UseBMI1Instructions) {
1740     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1741     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1742   }
1743 
1744   if (supports_bmi2() && supports_avx()) {
1745     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1746       UseBMI2Instructions = true;
1747     }
1748   } else if (UseBMI2Instructions) {
1749     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1750     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1751   }
1752 
1753   // Use population count instruction if available.
1754   if (supports_popcnt()) {
1755     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1756       UsePopCountInstruction = true;
1757     }
1758   } else if (UsePopCountInstruction) {
1759     warning("POPCNT instruction is not available on this CPU");
1760     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1761   }
1762 
1763   // Use fast-string operations if available.
1764   if (supports_erms()) {
1765     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1766       UseFastStosb = true;
1767     }
1768   } else if (UseFastStosb) {
1769     warning("fast-string operations are not available on this CPU");
1770     FLAG_SET_DEFAULT(UseFastStosb, false);
1771   }
1772 
1773   // For AMD Processors use XMM/YMM MOVDQU instructions
1774   // for Object Initialization as default
1775   if (is_amd() && cpu_family() >= 0x19) {
1776     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1777       UseFastStosb = false;
1778     }
1779   }
1780 
1781 #ifdef COMPILER2
1782   if (is_intel() && MaxVectorSize > 16) {
1783     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1784       UseFastStosb = false;
1785     }
1786   }
1787 #endif
1788 
1789   // Use XMM/YMM MOVDQU instruction for Object Initialization
1790   if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1791     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1792       UseXMMForObjInit = true;
1793     }
1794   } else if (UseXMMForObjInit) {
1795     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1796     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1797   }
1798 
1799 #ifdef COMPILER2
1800   if (FLAG_IS_DEFAULT(AlignVector)) {
1801     // Modern processors allow misaligned memory operations for vectors.
1802     AlignVector = !UseUnalignedLoadStores;
1803   }
1804 #endif // COMPILER2
1805 
1806   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1807     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1808       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1809     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1810       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1811     }
1812   }
1813 
1814   // Allocation prefetch settings
1815   intx cache_line_size = prefetch_data_size();
1816   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1817       (cache_line_size > AllocatePrefetchStepSize)) {
1818     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1819   }
1820 
1821   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1822     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1823     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1824       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1825     }
1826     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1827   }
1828 
1829   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1830     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1831     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1832   }
1833 
1834   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1835     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1836         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1837       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1838     }
1839 #ifdef COMPILER2
1840     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1841       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1842     }
1843 #endif
1844   }
1845 
1846   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1847 #ifdef COMPILER2
1848     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1849       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1850     }
1851 #endif
1852   }
1853 
1854 #ifdef _LP64
1855   // Prefetch settings
1856 
1857   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1858   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1859   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1860   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1861 
1862   // gc copy/scan is disabled if prefetchw isn't supported, because
1863   // Prefetch::write emits an inlined prefetchw on Linux.
1864   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1865   // The used prefetcht0 instruction works for both amd64 and em64t.
1866 
1867   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1868     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1869   }
1870   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1871     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1872   }
1873 #endif
1874 
1875   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1876      (cache_line_size > ContendedPaddingWidth))
1877      ContendedPaddingWidth = cache_line_size;
1878 
1879   // This machine allows unaligned memory accesses
1880   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1881     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1882   }
1883 
1884 #ifndef PRODUCT
1885   if (log_is_enabled(Info, os, cpu)) {
1886     LogStream ls(Log(os, cpu)::info());
1887     outputStream* log = &ls;
1888     log->print_cr("Logical CPUs per core: %u",
1889                   logical_processors_per_package());
1890     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1891     log->print("UseSSE=%d", UseSSE);
1892     if (UseAVX > 0) {
1893       log->print("  UseAVX=%d", UseAVX);
1894     }
1895     if (UseAES) {
1896       log->print("  UseAES=1");
1897     }
1898 #ifdef COMPILER2
1899     if (MaxVectorSize > 0) {
1900       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1901     }
1902 #endif
1903     log->cr();
1904     log->print("Allocation");
1905     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1906       log->print_cr(": no prefetching");
1907     } else {
1908       log->print(" prefetching: ");
1909       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1910         log->print("PREFETCHW");
1911       } else if (UseSSE >= 1) {
1912         if (AllocatePrefetchInstr == 0) {
1913           log->print("PREFETCHNTA");
1914         } else if (AllocatePrefetchInstr == 1) {
1915           log->print("PREFETCHT0");
1916         } else if (AllocatePrefetchInstr == 2) {
1917           log->print("PREFETCHT2");
1918         } else if (AllocatePrefetchInstr == 3) {
1919           log->print("PREFETCHW");
1920         }
1921       }
1922       if (AllocatePrefetchLines > 1) {
1923         log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1924       } else {
1925         log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1926       }
1927     }
1928 
1929     if (PrefetchCopyIntervalInBytes > 0) {
1930       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1931     }
1932     if (PrefetchScanIntervalInBytes > 0) {
1933       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1934     }
1935     if (ContendedPaddingWidth > 0) {
1936       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1937     }
1938   }
1939 #endif // !PRODUCT
1940   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1941       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1942   }
1943   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1944       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1945   }
1946 }
1947 
1948 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1949   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1950   if (vrt == XenHVM) {
1951     st->print_cr("Xen hardware-assisted virtualization detected");
1952   } else if (vrt == KVM) {
1953     st->print_cr("KVM virtualization detected");
1954   } else if (vrt == VMWare) {
1955     st->print_cr("VMWare virtualization detected");
1956     VirtualizationSupport::print_virtualization_info(st);
1957   } else if (vrt == HyperV) {
1958     st->print_cr("Hyper-V virtualization detected");
1959   } else if (vrt == HyperVRole) {
1960     st->print_cr("Hyper-V role detected");
1961   }
1962 }
1963 
1964 bool VM_Version::compute_has_intel_jcc_erratum() {
1965   if (!is_intel_family_core()) {
1966     // Only Intel CPUs are affected.
1967     return false;
1968   }
1969   // The following table of affected CPUs is based on the following document released by Intel:
1970   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1971   switch (_model) {
1972   case 0x8E:
1973     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1974     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1975     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1976     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1977     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1978     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1979     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1980     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1981     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1982     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1983   case 0x4E:
1984     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1985     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1986     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1987     return _stepping == 0x3;
1988   case 0x55:
1989     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1990     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1991     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1992     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1993     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1994     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1995     return _stepping == 0x4 || _stepping == 0x7;
1996   case 0x5E:
1997     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1998     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1999     return _stepping == 0x3;
2000   case 0x9E:
2001     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2002     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2003     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2004     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2005     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2006     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2007     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2008     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2009     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2010     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2011     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2012     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2013     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2014     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2015     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2016   case 0xA5:
2017     // Not in Intel documentation.
2018     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2019     return true;
2020   case 0xA6:
2021     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2022     return _stepping == 0x0;
2023   case 0xAE:
2024     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2025     return _stepping == 0xA;
2026   default:
2027     // If we are running on another intel machine not recognized in the table, we are okay.
2028     return false;
2029   }
2030 }
2031 
2032 // On Xen, the cpuid instruction returns
2033 //  eax / registers[0]: Version of Xen
2034 //  ebx / registers[1]: chars 'XenV'
2035 //  ecx / registers[2]: chars 'MMXe'
2036 //  edx / registers[3]: chars 'nVMM'
2037 //
2038 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2039 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2040 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2041 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2042 //
2043 // more information :
2044 // https://kb.vmware.com/s/article/1009458
2045 //
2046 void VM_Version::check_virtualizations() {
2047   uint32_t registers[4] = {0};
2048   char signature[13] = {0};
2049 
2050   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2051   // from 0x40000000 until 0x40010000.
2052   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2053   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2054     detect_virt_stub(leaf, registers);
2055     memcpy(signature, &registers[1], 12);
2056 
2057     if (strncmp("VMwareVMware", signature, 12) == 0) {
2058       Abstract_VM_Version::_detected_virtualization = VMWare;
2059       // check for extended metrics from guestlib
2060       VirtualizationSupport::initialize();
2061     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2062       Abstract_VM_Version::_detected_virtualization = HyperV;
2063 #ifdef _WINDOWS
2064       // CPUID leaf 0x40000007 is available to the root partition only.
2065       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2066       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2067       detect_virt_stub(0x40000007, registers);
2068       if ((registers[0] != 0x0) ||
2069           (registers[1] != 0x0) ||
2070           (registers[2] != 0x0) ||
2071           (registers[3] != 0x0)) {
2072         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2073       }
2074 #endif
2075     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2076       Abstract_VM_Version::_detected_virtualization = KVM;
2077     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2078       Abstract_VM_Version::_detected_virtualization = XenHVM;
2079     }
2080   }
2081 }
2082 
2083 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2084 // for implementing the array copy and clear operations.
2085 // The Intel platforms that supports the serialize instruction
2086 // has improved implementation of 64-byte load/stores and so the default
2087 // threshold is set to 0 for these platforms.
2088 int VM_Version::avx3_threshold() {
2089   return (is_intel_family_core() &&
2090           supports_serialize() &&
2091           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2092 }
2093 
2094 static bool _vm_version_initialized = false;
2095 
2096 void VM_Version::initialize() {
2097   ResourceMark rm;
2098   // Making this stub must be FIRST use of assembler
2099   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2100   if (stub_blob == NULL) {
2101     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2102   }
2103   CodeBuffer c(stub_blob);
2104   VM_Version_StubGenerator g(&c);
2105 
2106   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2107                                      g.generate_get_cpu_info());
2108   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2109                                      g.generate_detect_virt());
2110 
2111   get_processor_features();
2112 
2113   LP64_ONLY(Assembler::precompute_instructions();)
2114 
2115   if (VM_Version::supports_hv()) { // Supports hypervisor
2116     check_virtualizations();
2117   }
2118   _vm_version_initialized = true;
2119 }
2120 
2121 typedef enum {
2122    CPU_FAMILY_8086_8088  = 0,
2123    CPU_FAMILY_INTEL_286  = 2,
2124    CPU_FAMILY_INTEL_386  = 3,
2125    CPU_FAMILY_INTEL_486  = 4,
2126    CPU_FAMILY_PENTIUM    = 5,
2127    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2128    CPU_FAMILY_PENTIUM_4  = 0xF
2129 } FamilyFlag;
2130 
2131 typedef enum {
2132   RDTSCP_FLAG  = 0x08000000, // bit 27
2133   INTEL64_FLAG = 0x20000000  // bit 29
2134 } _featureExtendedEdxFlag;
2135 
2136 typedef enum {
2137    FPU_FLAG     = 0x00000001,
2138    VME_FLAG     = 0x00000002,
2139    DE_FLAG      = 0x00000004,
2140    PSE_FLAG     = 0x00000008,
2141    TSC_FLAG     = 0x00000010,
2142    MSR_FLAG     = 0x00000020,
2143    PAE_FLAG     = 0x00000040,
2144    MCE_FLAG     = 0x00000080,
2145    CX8_FLAG     = 0x00000100,
2146    APIC_FLAG    = 0x00000200,
2147    SEP_FLAG     = 0x00000800,
2148    MTRR_FLAG    = 0x00001000,
2149    PGE_FLAG     = 0x00002000,
2150    MCA_FLAG     = 0x00004000,
2151    CMOV_FLAG    = 0x00008000,
2152    PAT_FLAG     = 0x00010000,
2153    PSE36_FLAG   = 0x00020000,
2154    PSNUM_FLAG   = 0x00040000,
2155    CLFLUSH_FLAG = 0x00080000,
2156    DTS_FLAG     = 0x00200000,
2157    ACPI_FLAG    = 0x00400000,
2158    MMX_FLAG     = 0x00800000,
2159    FXSR_FLAG    = 0x01000000,
2160    SSE_FLAG     = 0x02000000,
2161    SSE2_FLAG    = 0x04000000,
2162    SS_FLAG      = 0x08000000,
2163    HTT_FLAG     = 0x10000000,
2164    TM_FLAG      = 0x20000000
2165 } FeatureEdxFlag;
2166 
2167 static BufferBlob* cpuid_brand_string_stub_blob;
2168 static const int   cpuid_brand_string_stub_size = 550;
2169 
2170 extern "C" {
2171   typedef void (*getCPUIDBrandString_stub_t)(void*);
2172 }
2173 
2174 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = NULL;
2175 
2176 // VM_Version statics
2177 enum {
2178   ExtendedFamilyIdLength_INTEL = 16,
2179   ExtendedFamilyIdLength_AMD   = 24
2180 };
2181 
2182 const size_t VENDOR_LENGTH = 13;
2183 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2184 static char* _cpu_brand_string = NULL;
2185 static int64_t _max_qualified_cpu_frequency = 0;
2186 
2187 static int _no_of_threads = 0;
2188 static int _no_of_cores = 0;
2189 
2190 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2191   "8086/8088",
2192   "",
2193   "286",
2194   "386",
2195   "486",
2196   "Pentium",
2197   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2198   "",
2199   "",
2200   "",
2201   "",
2202   "",
2203   "",
2204   "",
2205   "",
2206   "Pentium 4"
2207 };
2208 
2209 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2210   "",
2211   "",
2212   "",
2213   "",
2214   "5x86",
2215   "K5/K6",
2216   "Athlon/AthlonXP",
2217   "",
2218   "",
2219   "",
2220   "",
2221   "",
2222   "",
2223   "",
2224   "",
2225   "Opteron/Athlon64",
2226   "Opteron QC/Phenom",  // Barcelona et.al.
2227   "",
2228   "",
2229   "",
2230   "",
2231   "",
2232   "",
2233   "Zen"
2234 };
2235 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2236 // September 2013, Vol 3C Table 35-1
2237 const char* const _model_id_pentium_pro[] = {
2238   "",
2239   "Pentium Pro",
2240   "",
2241   "Pentium II model 3",
2242   "",
2243   "Pentium II model 5/Xeon/Celeron",
2244   "Celeron",
2245   "Pentium III/Pentium III Xeon",
2246   "Pentium III/Pentium III Xeon",
2247   "Pentium M model 9",    // Yonah
2248   "Pentium III, model A",
2249   "Pentium III, model B",
2250   "",
2251   "Pentium M model D",    // Dothan
2252   "",
2253   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2254   "",
2255   "",
2256   "",
2257   "",
2258   "",
2259   "",
2260   "Celeron",              // 0x16 Celeron 65nm
2261   "Core 2",               // 0x17 Penryn / Harpertown
2262   "",
2263   "",
2264   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2265   "Atom",                 // 0x1B Z5xx series Silverthorn
2266   "",
2267   "Core 2",               // 0x1D Dunnington (6-core)
2268   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2269   "",
2270   "",
2271   "",
2272   "",
2273   "",
2274   "",
2275   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2276   "",
2277   "",
2278   "",                     // 0x28
2279   "",
2280   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2281   "",
2282   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2283   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2284   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2285   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2286   "",
2287   "",
2288   "",
2289   "",
2290   "",
2291   "",
2292   "",
2293   "",
2294   "",
2295   "",
2296   "Ivy Bridge",           // 0x3a
2297   "",
2298   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2299   "",                     // 0x3d "Next Generation Intel Core Processor"
2300   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2301   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2302   "",
2303   "",
2304   "",
2305   "",
2306   "",
2307   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2308   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2309   NULL
2310 };
2311 
2312 /* Brand ID is for back compatibility
2313  * Newer CPUs uses the extended brand string */
2314 const char* const _brand_id[] = {
2315   "",
2316   "Celeron processor",
2317   "Pentium III processor",
2318   "Intel Pentium III Xeon processor",
2319   "",
2320   "",
2321   "",
2322   "",
2323   "Intel Pentium 4 processor",
2324   NULL
2325 };
2326 
2327 
2328 const char* const _feature_edx_id[] = {
2329   "On-Chip FPU",
2330   "Virtual Mode Extensions",
2331   "Debugging Extensions",
2332   "Page Size Extensions",
2333   "Time Stamp Counter",
2334   "Model Specific Registers",
2335   "Physical Address Extension",
2336   "Machine Check Exceptions",
2337   "CMPXCHG8B Instruction",
2338   "On-Chip APIC",
2339   "",
2340   "Fast System Call",
2341   "Memory Type Range Registers",
2342   "Page Global Enable",
2343   "Machine Check Architecture",
2344   "Conditional Mov Instruction",
2345   "Page Attribute Table",
2346   "36-bit Page Size Extension",
2347   "Processor Serial Number",
2348   "CLFLUSH Instruction",
2349   "",
2350   "Debug Trace Store feature",
2351   "ACPI registers in MSR space",
2352   "Intel Architecture MMX Technology",
2353   "Fast Float Point Save and Restore",
2354   "Streaming SIMD extensions",
2355   "Streaming SIMD extensions 2",
2356   "Self-Snoop",
2357   "Hyper Threading",
2358   "Thermal Monitor",
2359   "",
2360   "Pending Break Enable"
2361 };
2362 
2363 const char* const _feature_extended_edx_id[] = {
2364   "",
2365   "",
2366   "",
2367   "",
2368   "",
2369   "",
2370   "",
2371   "",
2372   "",
2373   "",
2374   "",
2375   "SYSCALL/SYSRET",
2376   "",
2377   "",
2378   "",
2379   "",
2380   "",
2381   "",
2382   "",
2383   "",
2384   "Execute Disable Bit",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "RDTSCP",
2392   "",
2393   "Intel 64 Architecture",
2394   "",
2395   ""
2396 };
2397 
2398 const char* const _feature_ecx_id[] = {
2399   "Streaming SIMD Extensions 3",
2400   "PCLMULQDQ",
2401   "64-bit DS Area",
2402   "MONITOR/MWAIT instructions",
2403   "CPL Qualified Debug Store",
2404   "Virtual Machine Extensions",
2405   "Safer Mode Extensions",
2406   "Enhanced Intel SpeedStep technology",
2407   "Thermal Monitor 2",
2408   "Supplemental Streaming SIMD Extensions 3",
2409   "L1 Context ID",
2410   "",
2411   "Fused Multiply-Add",
2412   "CMPXCHG16B",
2413   "xTPR Update Control",
2414   "Perfmon and Debug Capability",
2415   "",
2416   "Process-context identifiers",
2417   "Direct Cache Access",
2418   "Streaming SIMD extensions 4.1",
2419   "Streaming SIMD extensions 4.2",
2420   "x2APIC",
2421   "MOVBE",
2422   "Popcount instruction",
2423   "TSC-Deadline",
2424   "AESNI",
2425   "XSAVE",
2426   "OSXSAVE",
2427   "AVX",
2428   "F16C",
2429   "RDRAND",
2430   ""
2431 };
2432 
2433 const char* const _feature_extended_ecx_id[] = {
2434   "LAHF/SAHF instruction support",
2435   "Core multi-processor legacy mode",
2436   "",
2437   "",
2438   "",
2439   "Advanced Bit Manipulations: LZCNT",
2440   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2441   "Misaligned SSE mode",
2442   "",
2443   "",
2444   "",
2445   "",
2446   "",
2447   "",
2448   "",
2449   "",
2450   "",
2451   "",
2452   "",
2453   "",
2454   "",
2455   "",
2456   "",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   ""
2466 };
2467 
2468 void VM_Version::initialize_tsc(void) {
2469   ResourceMark rm;
2470 
2471   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2472   if (cpuid_brand_string_stub_blob == NULL) {
2473     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2474   }
2475   CodeBuffer c(cpuid_brand_string_stub_blob);
2476   VM_Version_StubGenerator g(&c);
2477   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2478                                    g.generate_getCPUIDBrandString());
2479 }
2480 
2481 const char* VM_Version::cpu_model_description(void) {
2482   uint32_t cpu_family = extended_cpu_family();
2483   uint32_t cpu_model = extended_cpu_model();
2484   const char* model = NULL;
2485 
2486   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2487     for (uint32_t i = 0; i <= cpu_model; i++) {
2488       model = _model_id_pentium_pro[i];
2489       if (model == NULL) {
2490         break;
2491       }
2492     }
2493   }
2494   return model;
2495 }
2496 
2497 const char* VM_Version::cpu_brand_string(void) {
2498   if (_cpu_brand_string == NULL) {
2499     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2500     if (NULL == _cpu_brand_string) {
2501       return NULL;
2502     }
2503     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2504     if (ret_val != OS_OK) {
2505       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2506       _cpu_brand_string = NULL;
2507     }
2508   }
2509   return _cpu_brand_string;
2510 }
2511 
2512 const char* VM_Version::cpu_brand(void) {
2513   const char*  brand  = NULL;
2514 
2515   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2516     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2517     brand = _brand_id[0];
2518     for (int i = 0; brand != NULL && i <= brand_num; i += 1) {
2519       brand = _brand_id[i];
2520     }
2521   }
2522   return brand;
2523 }
2524 
2525 bool VM_Version::cpu_is_em64t(void) {
2526   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2527 }
2528 
2529 bool VM_Version::is_netburst(void) {
2530   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2531 }
2532 
2533 bool VM_Version::supports_tscinv_ext(void) {
2534   if (!supports_tscinv_bit()) {
2535     return false;
2536   }
2537 
2538   if (is_intel()) {
2539     return true;
2540   }
2541 
2542   if (is_amd()) {
2543     return !is_amd_Barcelona();
2544   }
2545 
2546   if (is_hygon()) {
2547     return true;
2548   }
2549 
2550   return false;
2551 }
2552 
2553 void VM_Version::resolve_cpu_information_details(void) {
2554 
2555   // in future we want to base this information on proper cpu
2556   // and cache topology enumeration such as:
2557   // Intel 64 Architecture Processor Topology Enumeration
2558   // which supports system cpu and cache topology enumeration
2559   // either using 2xAPICIDs or initial APICIDs
2560 
2561   // currently only rough cpu information estimates
2562   // which will not necessarily reflect the exact configuration of the system
2563 
2564   // this is the number of logical hardware threads
2565   // visible to the operating system
2566   _no_of_threads = os::processor_count();
2567 
2568   // find out number of threads per cpu package
2569   int threads_per_package = threads_per_core() * cores_per_cpu();
2570 
2571   // use amount of threads visible to the process in order to guess number of sockets
2572   _no_of_sockets = _no_of_threads / threads_per_package;
2573 
2574   // process might only see a subset of the total number of threads
2575   // from a single processor package. Virtualization/resource management for example.
2576   // If so then just write a hard 1 as num of pkgs.
2577   if (0 == _no_of_sockets) {
2578     _no_of_sockets = 1;
2579   }
2580 
2581   // estimate the number of cores
2582   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2583 }
2584 
2585 
2586 const char* VM_Version::cpu_family_description(void) {
2587   int cpu_family_id = extended_cpu_family();
2588   if (is_amd()) {
2589     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2590       return _family_id_amd[cpu_family_id];
2591     }
2592   }
2593   if (is_intel()) {
2594     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2595       return cpu_model_description();
2596     }
2597     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2598       return _family_id_intel[cpu_family_id];
2599     }
2600   }
2601   if (is_hygon()) {
2602     return "Dhyana";
2603   }
2604   return "Unknown x86";
2605 }
2606 
2607 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2608   assert(buf != NULL, "buffer is NULL!");
2609   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2610 
2611   const char* cpu_type = NULL;
2612   const char* x64 = NULL;
2613 
2614   if (is_intel()) {
2615     cpu_type = "Intel";
2616     x64 = cpu_is_em64t() ? " Intel64" : "";
2617   } else if (is_amd()) {
2618     cpu_type = "AMD";
2619     x64 = cpu_is_em64t() ? " AMD64" : "";
2620   } else if (is_hygon()) {
2621     cpu_type = "Hygon";
2622     x64 = cpu_is_em64t() ? " AMD64" : "";
2623   } else {
2624     cpu_type = "Unknown x86";
2625     x64 = cpu_is_em64t() ? " x86_64" : "";
2626   }
2627 
2628   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2629     cpu_type,
2630     cpu_family_description(),
2631     supports_ht() ? " (HT)" : "",
2632     supports_sse3() ? " SSE3" : "",
2633     supports_ssse3() ? " SSSE3" : "",
2634     supports_sse4_1() ? " SSE4.1" : "",
2635     supports_sse4_2() ? " SSE4.2" : "",
2636     supports_sse4a() ? " SSE4A" : "",
2637     is_netburst() ? " Netburst" : "",
2638     is_intel_family_core() ? " Core" : "",
2639     x64);
2640 
2641   return OS_OK;
2642 }
2643 
2644 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2645   assert(buf != NULL, "buffer is NULL!");
2646   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2647   assert(getCPUIDBrandString_stub != NULL, "not initialized");
2648 
2649   // invoke newly generated asm code to fetch CPU Brand String
2650   getCPUIDBrandString_stub(&_cpuid_info);
2651 
2652   // fetch results into buffer
2653   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2654   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2655   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2656   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2657   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2658   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2659   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2660   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2661   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2662   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2663   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2664   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2665 
2666   return OS_OK;
2667 }
2668 
2669 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2670   guarantee(buf != NULL, "buffer is NULL!");
2671   guarantee(buf_len > 0, "buffer len not enough!");
2672 
2673   unsigned int flag = 0;
2674   unsigned int fi = 0;
2675   size_t       written = 0;
2676   const char*  prefix = "";
2677 
2678 #define WRITE_TO_BUF(string)                                                          \
2679   {                                                                                   \
2680     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2681     if (res < 0) {                                                                    \
2682       return buf_len - 1;                                                             \
2683     }                                                                                 \
2684     written += res;                                                                   \
2685     if (prefix[0] == '\0') {                                                          \
2686       prefix = ", ";                                                                  \
2687     }                                                                                 \
2688   }
2689 
2690   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2691     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2692       continue; /* no hyperthreading */
2693     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2694       continue; /* no fast system call */
2695     }
2696     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2697       WRITE_TO_BUF(_feature_edx_id[fi]);
2698     }
2699   }
2700 
2701   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2702     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2703       WRITE_TO_BUF(_feature_ecx_id[fi]);
2704     }
2705   }
2706 
2707   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2708     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2709       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2710     }
2711   }
2712 
2713   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2714     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2715       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2716     }
2717   }
2718 
2719   if (supports_tscinv_bit()) {
2720       WRITE_TO_BUF("Invariant TSC");
2721   }
2722 
2723   return written;
2724 }
2725 
2726 /**
2727  * Write a detailed description of the cpu to a given buffer, including
2728  * feature set.
2729  */
2730 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2731   assert(buf != NULL, "buffer is NULL!");
2732   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2733 
2734   static const char* unknown = "<unknown>";
2735   char               vendor_id[VENDOR_LENGTH];
2736   const char*        family = NULL;
2737   const char*        model = NULL;
2738   const char*        brand = NULL;
2739   int                outputLen = 0;
2740 
2741   family = cpu_family_description();
2742   if (family == NULL) {
2743     family = unknown;
2744   }
2745 
2746   model = cpu_model_description();
2747   if (model == NULL) {
2748     model = unknown;
2749   }
2750 
2751   brand = cpu_brand_string();
2752 
2753   if (brand == NULL) {
2754     brand = cpu_brand();
2755     if (brand == NULL) {
2756       brand = unknown;
2757     }
2758   }
2759 
2760   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2761   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2762   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2763   vendor_id[VENDOR_LENGTH-1] = '\0';
2764 
2765   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2766     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2767     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2768     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2769     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2770     "Supports: ",
2771     brand,
2772     vendor_id,
2773     family,
2774     extended_cpu_family(),
2775     model,
2776     extended_cpu_model(),
2777     cpu_stepping(),
2778     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2779     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2780     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2781     _cpuid_info.std_cpuid1_eax.value,
2782     _cpuid_info.std_cpuid1_ebx.value,
2783     _cpuid_info.std_cpuid1_ecx.value,
2784     _cpuid_info.std_cpuid1_edx.value,
2785     _cpuid_info.ext_cpuid1_eax,
2786     _cpuid_info.ext_cpuid1_ebx,
2787     _cpuid_info.ext_cpuid1_ecx,
2788     _cpuid_info.ext_cpuid1_edx);
2789 
2790   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2791     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2792     return OS_ERR;
2793   }
2794 
2795   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2796 
2797   return OS_OK;
2798 }
2799 
2800 
2801 // Fill in Abstract_VM_Version statics
2802 void VM_Version::initialize_cpu_information() {
2803   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2804   assert(!_initialized, "shouldn't be initialized yet");
2805   resolve_cpu_information_details();
2806 
2807   // initialize cpu_name and cpu_desc
2808   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2809   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2810   _initialized = true;
2811 }
2812 
2813 /**
2814  *  For information about extracting the frequency from the cpu brand string, please see:
2815  *
2816  *    Intel Processor Identification and the CPUID Instruction
2817  *    Application Note 485
2818  *    May 2012
2819  *
2820  * The return value is the frequency in Hz.
2821  */
2822 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2823   const char* const brand_string = cpu_brand_string();
2824   if (brand_string == NULL) {
2825     return 0;
2826   }
2827   const int64_t MEGA = 1000000;
2828   int64_t multiplier = 0;
2829   int64_t frequency = 0;
2830   uint8_t idx = 0;
2831   // The brand string buffer is at most 48 bytes.
2832   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2833   for (; idx < 48-2; ++idx) {
2834     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2835     // Search brand string for "yHz" where y is M, G, or T.
2836     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2837       if (brand_string[idx] == 'M') {
2838         multiplier = MEGA;
2839       } else if (brand_string[idx] == 'G') {
2840         multiplier = MEGA * 1000;
2841       } else if (brand_string[idx] == 'T') {
2842         multiplier = MEGA * MEGA;
2843       }
2844       break;
2845     }
2846   }
2847   if (multiplier > 0) {
2848     // Compute frequency (in Hz) from brand string.
2849     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2850       frequency =  (brand_string[idx-4] - '0') * multiplier;
2851       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2852       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2853     } else { // format is "xxxx"
2854       frequency =  (brand_string[idx-4] - '0') * 1000;
2855       frequency += (brand_string[idx-3] - '0') * 100;
2856       frequency += (brand_string[idx-2] - '0') * 10;
2857       frequency += (brand_string[idx-1] - '0');
2858       frequency *= multiplier;
2859     }
2860   }
2861   return frequency;
2862 }
2863 
2864 
2865 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2866   if (_max_qualified_cpu_frequency == 0) {
2867     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2868   }
2869   return _max_qualified_cpu_frequency;
2870 }
2871 
2872 uint64_t VM_Version::feature_flags() {
2873   uint64_t result = 0;
2874   if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
2875     result |= CPU_CX8;
2876   if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
2877     result |= CPU_CMOV;
2878   if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
2879     result |= CPU_FLUSH;
2880 #ifdef _LP64
2881   // clflush should always be available on x86_64
2882   // if not we are in real trouble because we rely on it
2883   // to flush the code cache.
2884   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2885 #endif
2886   if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2887       _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
2888     result |= CPU_FXSR;
2889   // HT flag is set for multi-core processors also.
2890   if (threads_per_core() > 1)
2891     result |= CPU_HT;
2892   if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2893       _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
2894     result |= CPU_MMX;
2895   if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
2896     result |= CPU_SSE;
2897   if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
2898     result |= CPU_SSE2;
2899   if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
2900     result |= CPU_SSE3;
2901   if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
2902     result |= CPU_SSSE3;
2903   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
2904     result |= CPU_SSE4_1;
2905   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
2906     result |= CPU_SSE4_2;
2907   if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
2908     result |= CPU_POPCNT;
2909   if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
2910       _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
2911       _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
2912       _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
2913     result |= CPU_AVX;
2914     result |= CPU_VZEROUPPER;
2915     if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0)
2916       result |= CPU_F16C;
2917     if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
2918       result |= CPU_AVX2;
2919     if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
2920         _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
2921         _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
2922         _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
2923       result |= CPU_AVX512F;
2924       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
2925         result |= CPU_AVX512CD;
2926       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
2927         result |= CPU_AVX512DQ;
2928       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0)
2929         result |= CPU_AVX512_IFMA;
2930       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
2931         result |= CPU_AVX512PF;
2932       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
2933         result |= CPU_AVX512ER;
2934       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
2935         result |= CPU_AVX512BW;
2936       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
2937         result |= CPU_AVX512VL;
2938       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2939         result |= CPU_AVX512_VPOPCNTDQ;
2940       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2941         result |= CPU_AVX512_VPCLMULQDQ;
2942       if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
2943         result |= CPU_AVX512_VAES;
2944       if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
2945         result |= CPU_GFNI;
2946       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
2947         result |= CPU_AVX512_VNNI;
2948       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2949         result |= CPU_AVX512_BITALG;
2950       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2951         result |= CPU_AVX512_VBMI;
2952       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2953         result |= CPU_AVX512_VBMI2;
2954     }
2955   }
2956   if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
2957     result |= CPU_HV;
2958   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
2959     result |= CPU_BMI1;
2960   if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
2961     result |= CPU_TSC;
2962   if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
2963     result |= CPU_TSCINV_BIT;
2964   if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
2965     result |= CPU_AES;
2966   if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
2967     result |= CPU_ERMS;
2968   if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2969     result |= CPU_FSRM;
2970   if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
2971     result |= CPU_CLMUL;
2972   if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
2973     result |= CPU_RTM;
2974   if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
2975      result |= CPU_ADX;
2976   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
2977     result |= CPU_BMI2;
2978   if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
2979     result |= CPU_SHA;
2980   if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
2981     result |= CPU_FMA;
2982   if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
2983     result |= CPU_FLUSHOPT;
2984   if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
2985     result |= CPU_RDTSCP;
2986   if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
2987     result |= CPU_RDPID;
2988 
2989   // AMD|Hygon features.
2990   if (is_amd_family()) {
2991     if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
2992         (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
2993       result |= CPU_3DNOW_PREFETCH;
2994     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
2995       result |= CPU_LZCNT;
2996     if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
2997       result |= CPU_SSE4A;
2998   }
2999 
3000   // Intel features.
3001   if (is_intel()) {
3002     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3003       result |= CPU_LZCNT;
3004     }
3005     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3006       result |= CPU_3DNOW_PREFETCH;
3007     }
3008     if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
3009       result |= CPU_CLWB;
3010     }
3011     if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
3012       result |= CPU_SERIALIZE;
3013   }
3014 
3015   // ZX features.
3016   if (is_zx()) {
3017     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3018       result |= CPU_LZCNT;
3019     }
3020     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3021       result |= CPU_3DNOW_PREFETCH;
3022     }
3023   }
3024 
3025   // Protection key features.
3026   if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) {
3027     result |= CPU_PKU;
3028   }
3029   if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) {
3030     result |= CPU_OSPKE;
3031   }
3032 
3033   // Control flow enforcement (CET) features.
3034   if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) {
3035     result |= CPU_CET_SS;
3036   }
3037   if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) {
3038     result |= CPU_CET_IBT;
3039   }
3040 
3041   // Composite features.
3042   if (supports_tscinv_bit() &&
3043       ((is_amd_family() && !is_amd_Barcelona()) ||
3044        is_intel_tsc_synched_at_init())) {
3045     result |= CPU_TSCINV;
3046   }
3047 
3048   return result;
3049 }
3050 
3051 bool VM_Version::os_supports_avx_vectors() {
3052   bool retVal = false;
3053   int nreg = 2 LP64_ONLY(+2);
3054   if (supports_evex()) {
3055     // Verify that OS save/restore all bits of EVEX registers
3056     // during signal processing.
3057     retVal = true;
3058     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3059       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3060         retVal = false;
3061         break;
3062       }
3063     }
3064   } else if (supports_avx()) {
3065     // Verify that OS save/restore all bits of AVX registers
3066     // during signal processing.
3067     retVal = true;
3068     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3069       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3070         retVal = false;
3071         break;
3072       }
3073     }
3074     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3075     if (retVal == false) {
3076       // Verify that OS save/restore all bits of EVEX registers
3077       // during signal processing.
3078       retVal = true;
3079       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3080         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3081           retVal = false;
3082           break;
3083         }
3084       }
3085     }
3086   }
3087   return retVal;
3088 }
3089 
3090 uint VM_Version::cores_per_cpu() {
3091   uint result = 1;
3092   if (is_intel()) {
3093     bool supports_topology = supports_processor_topology();
3094     if (supports_topology) {
3095       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3096                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3097     }
3098     if (!supports_topology || result == 0) {
3099       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3100     }
3101   } else if (is_amd_family()) {
3102     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3103   } else if (is_zx()) {
3104     bool supports_topology = supports_processor_topology();
3105     if (supports_topology) {
3106       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3107                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3108     }
3109     if (!supports_topology || result == 0) {
3110       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3111     }
3112   }
3113   return result;
3114 }
3115 
3116 uint VM_Version::threads_per_core() {
3117   uint result = 1;
3118   if (is_intel() && supports_processor_topology()) {
3119     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3120   } else if (is_zx() && supports_processor_topology()) {
3121     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3122   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3123     if (cpu_family() >= 0x17) {
3124       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3125     } else {
3126       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3127                  cores_per_cpu();
3128     }
3129   }
3130   return (result == 0 ? 1 : result);
3131 }
3132 
3133 intx VM_Version::L1_line_size() {
3134   intx result = 0;
3135   if (is_intel()) {
3136     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3137   } else if (is_amd_family()) {
3138     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3139   } else if (is_zx()) {
3140     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3141   }
3142   if (result < 32) // not defined ?
3143     result = 32;   // 32 bytes by default on x86 and other x64
3144   return result;
3145 }
3146 
3147 bool VM_Version::is_intel_tsc_synched_at_init() {
3148   if (is_intel_family_core()) {
3149     uint32_t ext_model = extended_cpu_model();
3150     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3151         ext_model == CPU_MODEL_WESTMERE_EP    ||
3152         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3153         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3154       // <= 2-socket invariant tsc support. EX versions are usually used
3155       // in > 2-socket systems and likely don't synchronize tscs at
3156       // initialization.
3157       // Code that uses tsc values must be prepared for them to arbitrarily
3158       // jump forward or backward.
3159       return true;
3160     }
3161   }
3162   return false;
3163 }
3164 
3165 intx VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3166   // Hardware prefetching (distance/size in bytes):
3167   // Pentium 3 -  64 /  32
3168   // Pentium 4 - 256 / 128
3169   // Athlon    -  64 /  32 ????
3170   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3171   // Core      - 128 /  64
3172   //
3173   // Software prefetching (distance in bytes / instruction with best score):
3174   // Pentium 3 - 128 / prefetchnta
3175   // Pentium 4 - 512 / prefetchnta
3176   // Athlon    - 128 / prefetchnta
3177   // Opteron   - 256 / prefetchnta
3178   // Core      - 256 / prefetchnta
3179   // It will be used only when AllocatePrefetchStyle > 0
3180 
3181   if (is_amd_family()) { // AMD | Hygon
3182     if (supports_sse2()) {
3183       return 256; // Opteron
3184     } else {
3185       return 128; // Athlon
3186     }
3187   } else { // Intel
3188     if (supports_sse3() && cpu_family() == 6) {
3189       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3190         return 192;
3191       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3192 #ifdef _LP64
3193         return 384;
3194 #else
3195         return 320;
3196 #endif
3197       }
3198     }
3199     if (supports_sse2()) {
3200       if (cpu_family() == 6) {
3201         return 256; // Pentium M, Core, Core2
3202       } else {
3203         return 512; // Pentium 4
3204       }
3205     } else {
3206       return 128; // Pentium 3 (and all other old CPUs)
3207     }
3208   }
3209 }