1 /*
   2  * Copyright (c) 1997, 2023, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "asm/macroAssembler.hpp"
  27 #include "asm/macroAssembler.inline.hpp"
  28 #include "classfile/vmIntrinsics.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "compiler/compilerDefinitions.inline.hpp"
  31 #include "jvm.h"
  32 #include "logging/log.hpp"
  33 #include "logging/logStream.hpp"
  34 #include "memory/resourceArea.hpp"
  35 #include "memory/universe.hpp"
  36 #include "runtime/globals_extension.hpp"
  37 #include "runtime/java.hpp"
  38 #include "runtime/os.inline.hpp"
  39 #include "runtime/stubCodeGenerator.hpp"
  40 #include "runtime/vm_version.hpp"
  41 #include "utilities/powerOfTwo.hpp"
  42 #include "utilities/virtualizationSupport.hpp"
  43 
  44 int VM_Version::_cpu;
  45 int VM_Version::_model;
  46 int VM_Version::_stepping;
  47 bool VM_Version::_has_intel_jcc_erratum;
  48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  49 
  50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  52 #undef DECLARE_CPU_FEATURE_FLAG
  53 
  54 // Address of instruction which causes SEGV
  55 address VM_Version::_cpuinfo_segv_addr = 0;
  56 // Address of instruction after the one which causes SEGV
  57 address VM_Version::_cpuinfo_cont_addr = 0;
  58 
  59 static BufferBlob* stub_blob;
  60 static const int stub_size = 2000;
  61 
  62 extern "C" {
  63   typedef void (*get_cpu_info_stub_t)(void*);
  64   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  65 }
  66 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
  67 static detect_virt_stub_t detect_virt_stub = nullptr;
  68 
  69 #ifdef _LP64
  70 
  71 bool VM_Version::supports_clflush() {
  72   // clflush should always be available on x86_64
  73   // if not we are in real trouble because we rely on it
  74   // to flush the code cache.
  75   // Unfortunately, Assembler::clflush is currently called as part
  76   // of generation of the code cache flush routine. This happens
  77   // under Universe::init before the processor features are set
  78   // up. Assembler::flush calls this routine to check that clflush
  79   // is allowed. So, we give the caller a free pass if Universe init
  80   // is still in progress.
  81   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  82   return true;
  83 }
  84 #endif
  85 
  86 #define CPUID_STANDARD_FN   0x0
  87 #define CPUID_STANDARD_FN_1 0x1
  88 #define CPUID_STANDARD_FN_4 0x4
  89 #define CPUID_STANDARD_FN_B 0xb
  90 
  91 #define CPUID_EXTENDED_FN   0x80000000
  92 #define CPUID_EXTENDED_FN_1 0x80000001
  93 #define CPUID_EXTENDED_FN_2 0x80000002
  94 #define CPUID_EXTENDED_FN_3 0x80000003
  95 #define CPUID_EXTENDED_FN_4 0x80000004
  96 #define CPUID_EXTENDED_FN_7 0x80000007
  97 #define CPUID_EXTENDED_FN_8 0x80000008
  98 
  99 class VM_Version_StubGenerator: public StubCodeGenerator {
 100  public:
 101 
 102   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 103 
 104   address generate_get_cpu_info() {
 105     // Flags to test CPU type.
 106     const uint32_t HS_EFL_AC = 0x40000;
 107     const uint32_t HS_EFL_ID = 0x200000;
 108     // Values for when we don't have a CPUID instruction.
 109     const int      CPU_FAMILY_SHIFT = 8;
 110     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 111     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 112     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 113 
 114     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 115     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 116     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 117 
 118     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 119 #   define __ _masm->
 120 
 121     address start = __ pc();
 122 
 123     //
 124     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 125     //
 126     // LP64: rcx and rdx are first and second argument registers on windows
 127 
 128     __ push(rbp);
 129 #ifdef _LP64
 130     __ mov(rbp, c_rarg0); // cpuid_info address
 131 #else
 132     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 133 #endif
 134     __ push(rbx);
 135     __ push(rsi);
 136     __ pushf();          // preserve rbx, and flags
 137     __ pop(rax);
 138     __ push(rax);
 139     __ mov(rcx, rax);
 140     //
 141     // if we are unable to change the AC flag, we have a 386
 142     //
 143     __ xorl(rax, HS_EFL_AC);
 144     __ push(rax);
 145     __ popf();
 146     __ pushf();
 147     __ pop(rax);
 148     __ cmpptr(rax, rcx);
 149     __ jccb(Assembler::notEqual, detect_486);
 150 
 151     __ movl(rax, CPU_FAMILY_386);
 152     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 153     __ jmp(done);
 154 
 155     //
 156     // If we are unable to change the ID flag, we have a 486 which does
 157     // not support the "cpuid" instruction.
 158     //
 159     __ bind(detect_486);
 160     __ mov(rax, rcx);
 161     __ xorl(rax, HS_EFL_ID);
 162     __ push(rax);
 163     __ popf();
 164     __ pushf();
 165     __ pop(rax);
 166     __ cmpptr(rcx, rax);
 167     __ jccb(Assembler::notEqual, detect_586);
 168 
 169     __ bind(cpu486);
 170     __ movl(rax, CPU_FAMILY_486);
 171     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 172     __ jmp(done);
 173 
 174     //
 175     // At this point, we have a chip which supports the "cpuid" instruction
 176     //
 177     __ bind(detect_586);
 178     __ xorl(rax, rax);
 179     __ cpuid();
 180     __ orl(rax, rax);
 181     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 182                                         // value of at least 1, we give up and
 183                                         // assume a 486
 184     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 185     __ movl(Address(rsi, 0), rax);
 186     __ movl(Address(rsi, 4), rbx);
 187     __ movl(Address(rsi, 8), rcx);
 188     __ movl(Address(rsi,12), rdx);
 189 
 190     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 191     __ jccb(Assembler::belowEqual, std_cpuid4);
 192 
 193     //
 194     // cpuid(0xB) Processor Topology
 195     //
 196     __ movl(rax, 0xb);
 197     __ xorl(rcx, rcx);   // Threads level
 198     __ cpuid();
 199 
 200     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 201     __ movl(Address(rsi, 0), rax);
 202     __ movl(Address(rsi, 4), rbx);
 203     __ movl(Address(rsi, 8), rcx);
 204     __ movl(Address(rsi,12), rdx);
 205 
 206     __ movl(rax, 0xb);
 207     __ movl(rcx, 1);     // Cores level
 208     __ cpuid();
 209     __ push(rax);
 210     __ andl(rax, 0x1f);  // Determine if valid topology level
 211     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 212     __ andl(rax, 0xffff);
 213     __ pop(rax);
 214     __ jccb(Assembler::equal, std_cpuid4);
 215 
 216     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 217     __ movl(Address(rsi, 0), rax);
 218     __ movl(Address(rsi, 4), rbx);
 219     __ movl(Address(rsi, 8), rcx);
 220     __ movl(Address(rsi,12), rdx);
 221 
 222     __ movl(rax, 0xb);
 223     __ movl(rcx, 2);     // Packages level
 224     __ cpuid();
 225     __ push(rax);
 226     __ andl(rax, 0x1f);  // Determine if valid topology level
 227     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 228     __ andl(rax, 0xffff);
 229     __ pop(rax);
 230     __ jccb(Assembler::equal, std_cpuid4);
 231 
 232     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 233     __ movl(Address(rsi, 0), rax);
 234     __ movl(Address(rsi, 4), rbx);
 235     __ movl(Address(rsi, 8), rcx);
 236     __ movl(Address(rsi,12), rdx);
 237 
 238     //
 239     // cpuid(0x4) Deterministic cache params
 240     //
 241     __ bind(std_cpuid4);
 242     __ movl(rax, 4);
 243     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 244     __ jccb(Assembler::greater, std_cpuid1);
 245 
 246     __ xorl(rcx, rcx);   // L1 cache
 247     __ cpuid();
 248     __ push(rax);
 249     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 250     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 251     __ pop(rax);
 252     __ jccb(Assembler::equal, std_cpuid1);
 253 
 254     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 255     __ movl(Address(rsi, 0), rax);
 256     __ movl(Address(rsi, 4), rbx);
 257     __ movl(Address(rsi, 8), rcx);
 258     __ movl(Address(rsi,12), rdx);
 259 
 260     //
 261     // Standard cpuid(0x1)
 262     //
 263     __ bind(std_cpuid1);
 264     __ movl(rax, 1);
 265     __ cpuid();
 266     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 267     __ movl(Address(rsi, 0), rax);
 268     __ movl(Address(rsi, 4), rbx);
 269     __ movl(Address(rsi, 8), rcx);
 270     __ movl(Address(rsi,12), rdx);
 271 
 272     //
 273     // Check if OS has enabled XGETBV instruction to access XCR0
 274     // (OSXSAVE feature flag) and CPU supports AVX
 275     //
 276     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 277     __ cmpl(rcx, 0x18000000);
 278     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 279 
 280     //
 281     // XCR0, XFEATURE_ENABLED_MASK register
 282     //
 283     __ xorl(rcx, rcx);   // zero for XCR0 register
 284     __ xgetbv();
 285     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 286     __ movl(Address(rsi, 0), rax);
 287     __ movl(Address(rsi, 4), rdx);
 288 
 289     //
 290     // cpuid(0x7) Structured Extended Features
 291     //
 292     __ bind(sef_cpuid);
 293     __ movl(rax, 7);
 294     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 295     __ jccb(Assembler::greater, ext_cpuid);
 296 
 297     __ xorl(rcx, rcx);
 298     __ cpuid();
 299     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 300     __ movl(Address(rsi, 0), rax);
 301     __ movl(Address(rsi, 4), rbx);
 302     __ movl(Address(rsi, 8), rcx);
 303     __ movl(Address(rsi, 12), rdx);
 304 
 305     //
 306     // Extended cpuid(0x80000000)
 307     //
 308     __ bind(ext_cpuid);
 309     __ movl(rax, 0x80000000);
 310     __ cpuid();
 311     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 312     __ jcc(Assembler::belowEqual, done);
 313     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 314     __ jcc(Assembler::belowEqual, ext_cpuid1);
 315     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 316     __ jccb(Assembler::belowEqual, ext_cpuid5);
 317     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 318     __ jccb(Assembler::belowEqual, ext_cpuid7);
 319     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 320     __ jccb(Assembler::belowEqual, ext_cpuid8);
 321     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 322     __ jccb(Assembler::below, ext_cpuid8);
 323     //
 324     // Extended cpuid(0x8000001E)
 325     //
 326     __ movl(rax, 0x8000001E);
 327     __ cpuid();
 328     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 329     __ movl(Address(rsi, 0), rax);
 330     __ movl(Address(rsi, 4), rbx);
 331     __ movl(Address(rsi, 8), rcx);
 332     __ movl(Address(rsi,12), rdx);
 333 
 334     //
 335     // Extended cpuid(0x80000008)
 336     //
 337     __ bind(ext_cpuid8);
 338     __ movl(rax, 0x80000008);
 339     __ cpuid();
 340     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 341     __ movl(Address(rsi, 0), rax);
 342     __ movl(Address(rsi, 4), rbx);
 343     __ movl(Address(rsi, 8), rcx);
 344     __ movl(Address(rsi,12), rdx);
 345 
 346     //
 347     // Extended cpuid(0x80000007)
 348     //
 349     __ bind(ext_cpuid7);
 350     __ movl(rax, 0x80000007);
 351     __ cpuid();
 352     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 353     __ movl(Address(rsi, 0), rax);
 354     __ movl(Address(rsi, 4), rbx);
 355     __ movl(Address(rsi, 8), rcx);
 356     __ movl(Address(rsi,12), rdx);
 357 
 358     //
 359     // Extended cpuid(0x80000005)
 360     //
 361     __ bind(ext_cpuid5);
 362     __ movl(rax, 0x80000005);
 363     __ cpuid();
 364     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 365     __ movl(Address(rsi, 0), rax);
 366     __ movl(Address(rsi, 4), rbx);
 367     __ movl(Address(rsi, 8), rcx);
 368     __ movl(Address(rsi,12), rdx);
 369 
 370     //
 371     // Extended cpuid(0x80000001)
 372     //
 373     __ bind(ext_cpuid1);
 374     __ movl(rax, 0x80000001);
 375     __ cpuid();
 376     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 377     __ movl(Address(rsi, 0), rax);
 378     __ movl(Address(rsi, 4), rbx);
 379     __ movl(Address(rsi, 8), rcx);
 380     __ movl(Address(rsi,12), rdx);
 381 
 382     //
 383     // Check if OS has enabled XGETBV instruction to access XCR0
 384     // (OSXSAVE feature flag) and CPU supports AVX
 385     //
 386     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 387     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 388     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 389     __ cmpl(rcx, 0x18000000);
 390     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 391 
 392     __ movl(rax, 0x6);
 393     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 394     __ cmpl(rax, 0x6);
 395     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 396 
 397     // we need to bridge farther than imm8, so we use this island as a thunk
 398     __ bind(done);
 399     __ jmp(wrapup);
 400 
 401     __ bind(start_simd_check);
 402     //
 403     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 404     // registers are not restored after a signal processing.
 405     // Generate SEGV here (reference through null)
 406     // and check upper YMM/ZMM bits after it.
 407     //
 408     int saved_useavx = UseAVX;
 409     int saved_usesse = UseSSE;
 410 
 411     // If UseAVX is uninitialized or is set by the user to include EVEX
 412     if (use_evex) {
 413       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 414       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 415       __ movl(rax, 0x10000);
 416       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 417       __ cmpl(rax, 0x10000);
 418       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 419       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 420       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 421       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 422       __ movl(rax, 0xE0);
 423       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 424       __ cmpl(rax, 0xE0);
 425       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 426 
 427       if (FLAG_IS_DEFAULT(UseAVX)) {
 428         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 429         __ movl(rax, Address(rsi, 0));
 430         __ cmpl(rax, 0x50654);              // If it is Skylake
 431         __ jcc(Assembler::equal, legacy_setup);
 432       }
 433       // EVEX setup: run in lowest evex mode
 434       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 435       UseAVX = 3;
 436       UseSSE = 2;
 437 #ifdef _WINDOWS
 438       // xmm5-xmm15 are not preserved by caller on windows
 439       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 440       __ subptr(rsp, 64);
 441       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 442 #ifdef _LP64
 443       __ subptr(rsp, 64);
 444       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 445       __ subptr(rsp, 64);
 446       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 447 #endif // _LP64
 448 #endif // _WINDOWS
 449 
 450       // load value into all 64 bytes of zmm7 register
 451       __ movl(rcx, VM_Version::ymm_test_value());
 452       __ movdl(xmm0, rcx);
 453       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 454       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 455 #ifdef _LP64
 456       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 457       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 458 #endif
 459       VM_Version::clean_cpuFeatures();
 460       __ jmp(save_restore_except);
 461     }
 462 
 463     __ bind(legacy_setup);
 464     // AVX setup
 465     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 466     UseAVX = 1;
 467     UseSSE = 2;
 468 #ifdef _WINDOWS
 469     __ subptr(rsp, 32);
 470     __ vmovdqu(Address(rsp, 0), xmm7);
 471 #ifdef _LP64
 472     __ subptr(rsp, 32);
 473     __ vmovdqu(Address(rsp, 0), xmm8);
 474     __ subptr(rsp, 32);
 475     __ vmovdqu(Address(rsp, 0), xmm15);
 476 #endif // _LP64
 477 #endif // _WINDOWS
 478 
 479     // load value into all 32 bytes of ymm7 register
 480     __ movl(rcx, VM_Version::ymm_test_value());
 481 
 482     __ movdl(xmm0, rcx);
 483     __ pshufd(xmm0, xmm0, 0x00);
 484     __ vinsertf128_high(xmm0, xmm0);
 485     __ vmovdqu(xmm7, xmm0);
 486 #ifdef _LP64
 487     __ vmovdqu(xmm8, xmm0);
 488     __ vmovdqu(xmm15, xmm0);
 489 #endif
 490     VM_Version::clean_cpuFeatures();
 491 
 492     __ bind(save_restore_except);
 493     __ xorl(rsi, rsi);
 494     VM_Version::set_cpuinfo_segv_addr(__ pc());
 495     // Generate SEGV
 496     __ movl(rax, Address(rsi, 0));
 497 
 498     VM_Version::set_cpuinfo_cont_addr(__ pc());
 499     // Returns here after signal. Save xmm0 to check it later.
 500 
 501     // If UseAVX is uninitialized or is set by the user to include EVEX
 502     if (use_evex) {
 503       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 504       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 505       __ movl(rax, 0x10000);
 506       __ andl(rax, Address(rsi, 4));
 507       __ cmpl(rax, 0x10000);
 508       __ jcc(Assembler::notEqual, legacy_save_restore);
 509       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 510       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 511       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 512       __ movl(rax, 0xE0);
 513       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 514       __ cmpl(rax, 0xE0);
 515       __ jcc(Assembler::notEqual, legacy_save_restore);
 516 
 517       if (FLAG_IS_DEFAULT(UseAVX)) {
 518         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 519         __ movl(rax, Address(rsi, 0));
 520         __ cmpl(rax, 0x50654);              // If it is Skylake
 521         __ jcc(Assembler::equal, legacy_save_restore);
 522       }
 523       // EVEX check: run in lowest evex mode
 524       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 525       UseAVX = 3;
 526       UseSSE = 2;
 527       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 528       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 529       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 530 #ifdef _LP64
 531       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 532       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 533 #endif
 534 
 535 #ifdef _WINDOWS
 536 #ifdef _LP64
 537       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 538       __ addptr(rsp, 64);
 539       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 540       __ addptr(rsp, 64);
 541 #endif // _LP64
 542       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 543       __ addptr(rsp, 64);
 544 #endif // _WINDOWS
 545       generate_vzeroupper(wrapup);
 546       VM_Version::clean_cpuFeatures();
 547       UseAVX = saved_useavx;
 548       UseSSE = saved_usesse;
 549       __ jmp(wrapup);
 550    }
 551 
 552     __ bind(legacy_save_restore);
 553     // AVX check
 554     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 555     UseAVX = 1;
 556     UseSSE = 2;
 557     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 558     __ vmovdqu(Address(rsi, 0), xmm0);
 559     __ vmovdqu(Address(rsi, 32), xmm7);
 560 #ifdef _LP64
 561     __ vmovdqu(Address(rsi, 64), xmm8);
 562     __ vmovdqu(Address(rsi, 96), xmm15);
 563 #endif
 564 
 565 #ifdef _WINDOWS
 566 #ifdef _LP64
 567     __ vmovdqu(xmm15, Address(rsp, 0));
 568     __ addptr(rsp, 32);
 569     __ vmovdqu(xmm8, Address(rsp, 0));
 570     __ addptr(rsp, 32);
 571 #endif // _LP64
 572     __ vmovdqu(xmm7, Address(rsp, 0));
 573     __ addptr(rsp, 32);
 574 #endif // _WINDOWS
 575     generate_vzeroupper(wrapup);
 576     VM_Version::clean_cpuFeatures();
 577     UseAVX = saved_useavx;
 578     UseSSE = saved_usesse;
 579 
 580     __ bind(wrapup);
 581     __ popf();
 582     __ pop(rsi);
 583     __ pop(rbx);
 584     __ pop(rbp);
 585     __ ret(0);
 586 
 587 #   undef __
 588 
 589     return start;
 590   };
 591   void generate_vzeroupper(Label& L_wrapup) {
 592 #   define __ _masm->
 593     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 594     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 595     __ jcc(Assembler::notEqual, L_wrapup);
 596     __ movl(rcx, 0x0FFF0FF0);
 597     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 598     __ andl(rcx, Address(rsi, 0));
 599     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 600     __ jcc(Assembler::equal, L_wrapup);
 601     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 602     __ jcc(Assembler::equal, L_wrapup);
 603     // vzeroupper() will use a pre-computed instruction sequence that we
 604     // can't compute until after we've determined CPU capabilities. Use
 605     // uncached variant here directly to be able to bootstrap correctly
 606     __ vzeroupper_uncached();
 607 #   undef __
 608   }
 609   address generate_detect_virt() {
 610     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 611 #   define __ _masm->
 612 
 613     address start = __ pc();
 614 
 615     // Evacuate callee-saved registers
 616     __ push(rbp);
 617     __ push(rbx);
 618     __ push(rsi); // for Windows
 619 
 620 #ifdef _LP64
 621     __ mov(rax, c_rarg0); // CPUID leaf
 622     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 623 #else
 624     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 625     __ movptr(rsi, Address(rsp, 20)); // register array address
 626 #endif
 627 
 628     __ cpuid();
 629 
 630     // Store result to register array
 631     __ movl(Address(rsi,  0), rax);
 632     __ movl(Address(rsi,  4), rbx);
 633     __ movl(Address(rsi,  8), rcx);
 634     __ movl(Address(rsi, 12), rdx);
 635 
 636     // Epilogue
 637     __ pop(rsi);
 638     __ pop(rbx);
 639     __ pop(rbp);
 640     __ ret(0);
 641 
 642 #   undef __
 643 
 644     return start;
 645   };
 646 
 647 
 648   address generate_getCPUIDBrandString(void) {
 649     // Flags to test CPU type.
 650     const uint32_t HS_EFL_AC           = 0x40000;
 651     const uint32_t HS_EFL_ID           = 0x200000;
 652     // Values for when we don't have a CPUID instruction.
 653     const int      CPU_FAMILY_SHIFT = 8;
 654     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 655     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 656 
 657     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 658 
 659     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 660 #   define __ _masm->
 661 
 662     address start = __ pc();
 663 
 664     //
 665     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 666     //
 667     // LP64: rcx and rdx are first and second argument registers on windows
 668 
 669     __ push(rbp);
 670 #ifdef _LP64
 671     __ mov(rbp, c_rarg0); // cpuid_info address
 672 #else
 673     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 674 #endif
 675     __ push(rbx);
 676     __ push(rsi);
 677     __ pushf();          // preserve rbx, and flags
 678     __ pop(rax);
 679     __ push(rax);
 680     __ mov(rcx, rax);
 681     //
 682     // if we are unable to change the AC flag, we have a 386
 683     //
 684     __ xorl(rax, HS_EFL_AC);
 685     __ push(rax);
 686     __ popf();
 687     __ pushf();
 688     __ pop(rax);
 689     __ cmpptr(rax, rcx);
 690     __ jccb(Assembler::notEqual, detect_486);
 691 
 692     __ movl(rax, CPU_FAMILY_386);
 693     __ jmp(done);
 694 
 695     //
 696     // If we are unable to change the ID flag, we have a 486 which does
 697     // not support the "cpuid" instruction.
 698     //
 699     __ bind(detect_486);
 700     __ mov(rax, rcx);
 701     __ xorl(rax, HS_EFL_ID);
 702     __ push(rax);
 703     __ popf();
 704     __ pushf();
 705     __ pop(rax);
 706     __ cmpptr(rcx, rax);
 707     __ jccb(Assembler::notEqual, detect_586);
 708 
 709     __ bind(cpu486);
 710     __ movl(rax, CPU_FAMILY_486);
 711     __ jmp(done);
 712 
 713     //
 714     // At this point, we have a chip which supports the "cpuid" instruction
 715     //
 716     __ bind(detect_586);
 717     __ xorl(rax, rax);
 718     __ cpuid();
 719     __ orl(rax, rax);
 720     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 721                                         // value of at least 1, we give up and
 722                                         // assume a 486
 723 
 724     //
 725     // Extended cpuid(0x80000000) for processor brand string detection
 726     //
 727     __ bind(ext_cpuid);
 728     __ movl(rax, CPUID_EXTENDED_FN);
 729     __ cpuid();
 730     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 731     __ jcc(Assembler::below, done);
 732 
 733     //
 734     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 735     //
 736     __ movl(rax, CPUID_EXTENDED_FN_2);
 737     __ cpuid();
 738     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 739     __ movl(Address(rsi, 0), rax);
 740     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 741     __ movl(Address(rsi, 0), rbx);
 742     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 743     __ movl(Address(rsi, 0), rcx);
 744     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 745     __ movl(Address(rsi,0), rdx);
 746 
 747     //
 748     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 749     //
 750     __ movl(rax, CPUID_EXTENDED_FN_3);
 751     __ cpuid();
 752     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 753     __ movl(Address(rsi, 0), rax);
 754     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 755     __ movl(Address(rsi, 0), rbx);
 756     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 757     __ movl(Address(rsi, 0), rcx);
 758     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 759     __ movl(Address(rsi,0), rdx);
 760 
 761     //
 762     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 763     //
 764     __ movl(rax, CPUID_EXTENDED_FN_4);
 765     __ cpuid();
 766     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 767     __ movl(Address(rsi, 0), rax);
 768     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 769     __ movl(Address(rsi, 0), rbx);
 770     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 771     __ movl(Address(rsi, 0), rcx);
 772     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 773     __ movl(Address(rsi,0), rdx);
 774 
 775     //
 776     // return
 777     //
 778     __ bind(done);
 779     __ popf();
 780     __ pop(rsi);
 781     __ pop(rbx);
 782     __ pop(rbp);
 783     __ ret(0);
 784 
 785 #   undef __
 786 
 787     return start;
 788   };
 789 };
 790 
 791 void VM_Version::get_processor_features() {
 792 
 793   _cpu = 4; // 486 by default
 794   _model = 0;
 795   _stepping = 0;
 796   _features = 0;
 797   _logical_processors_per_package = 1;
 798   // i486 internal cache is both I&D and has a 16-byte line size
 799   _L1_data_cache_line_size = 16;
 800 
 801   // Get raw processor info
 802 
 803   get_cpu_info_stub(&_cpuid_info);
 804 
 805   assert_is_initialized();
 806   _cpu = extended_cpu_family();
 807   _model = extended_cpu_model();
 808   _stepping = cpu_stepping();
 809 
 810   if (cpu_family() > 4) { // it supports CPUID
 811     _features = feature_flags();
 812     // Logical processors are only available on P4s and above,
 813     // and only if hyperthreading is available.
 814     _logical_processors_per_package = logical_processor_count();
 815     _L1_data_cache_line_size = L1_line_size();
 816   }
 817 
 818   _supports_cx8 = supports_cmpxchg8();
 819   // xchg and xadd instructions
 820   _supports_atomic_getset4 = true;
 821   _supports_atomic_getadd4 = true;
 822   LP64_ONLY(_supports_atomic_getset8 = true);
 823   LP64_ONLY(_supports_atomic_getadd8 = true);
 824 
 825 #ifdef _LP64
 826   // OS should support SSE for x64 and hardware should support at least SSE2.
 827   if (!VM_Version::supports_sse2()) {
 828     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 829   }
 830   // in 64 bit the use of SSE2 is the minimum
 831   if (UseSSE < 2) UseSSE = 2;
 832 #endif
 833 
 834 #ifdef AMD64
 835   // flush_icache_stub have to be generated first.
 836   // That is why Icache line size is hard coded in ICache class,
 837   // see icache_x86.hpp. It is also the reason why we can't use
 838   // clflush instruction in 32-bit VM since it could be running
 839   // on CPU which does not support it.
 840   //
 841   // The only thing we can do is to verify that flushed
 842   // ICache::line_size has correct value.
 843   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 844   // clflush_size is size in quadwords (8 bytes).
 845   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 846 #endif
 847 
 848 #ifdef _LP64
 849   // assigning this field effectively enables Unsafe.writebackMemory()
 850   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 851   // that is only implemented on x86_64 and only if the OS plays ball
 852   if (os::supports_map_sync()) {
 853     // publish data cache line flush size to generic field, otherwise
 854     // let if default to zero thereby disabling writeback
 855     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 856   }
 857 #endif
 858 
 859   if (UseSSE < 4) {
 860     _features &= ~CPU_SSE4_1;
 861     _features &= ~CPU_SSE4_2;
 862   }
 863 
 864   if (UseSSE < 3) {
 865     _features &= ~CPU_SSE3;
 866     _features &= ~CPU_SSSE3;
 867     _features &= ~CPU_SSE4A;
 868   }
 869 
 870   if (UseSSE < 2)
 871     _features &= ~CPU_SSE2;
 872 
 873   if (UseSSE < 1)
 874     _features &= ~CPU_SSE;
 875 
 876   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 877   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 878     UseAVX = 0;
 879   }
 880 
 881   // UseSSE is set to the smaller of what hardware supports and what
 882   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 883   // older Pentiums which do not support it.
 884   int use_sse_limit = 0;
 885   if (UseSSE > 0) {
 886     if (UseSSE > 3 && supports_sse4_1()) {
 887       use_sse_limit = 4;
 888     } else if (UseSSE > 2 && supports_sse3()) {
 889       use_sse_limit = 3;
 890     } else if (UseSSE > 1 && supports_sse2()) {
 891       use_sse_limit = 2;
 892     } else if (UseSSE > 0 && supports_sse()) {
 893       use_sse_limit = 1;
 894     } else {
 895       use_sse_limit = 0;
 896     }
 897   }
 898   if (FLAG_IS_DEFAULT(UseSSE)) {
 899     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 900   } else if (UseSSE > use_sse_limit) {
 901     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
 902     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 903   }
 904 
 905   // first try initial setting and detect what we can support
 906   int use_avx_limit = 0;
 907   if (UseAVX > 0) {
 908     if (UseSSE < 4) {
 909       // Don't use AVX if SSE is unavailable or has been disabled.
 910       use_avx_limit = 0;
 911     } else if (UseAVX > 2 && supports_evex()) {
 912       use_avx_limit = 3;
 913     } else if (UseAVX > 1 && supports_avx2()) {
 914       use_avx_limit = 2;
 915     } else if (UseAVX > 0 && supports_avx()) {
 916       use_avx_limit = 1;
 917     } else {
 918       use_avx_limit = 0;
 919     }
 920   }
 921   if (FLAG_IS_DEFAULT(UseAVX)) {
 922     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 923     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 924       FLAG_SET_DEFAULT(UseAVX, 2);
 925     } else {
 926       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 927     }
 928   }
 929   if (UseAVX > use_avx_limit) {
 930     if (UseSSE < 4) {
 931       warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
 932     } else {
 933       warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
 934     }
 935     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 936   }
 937 
 938   if (UseAVX < 3) {
 939     _features &= ~CPU_AVX512F;
 940     _features &= ~CPU_AVX512DQ;
 941     _features &= ~CPU_AVX512CD;
 942     _features &= ~CPU_AVX512BW;
 943     _features &= ~CPU_AVX512VL;
 944     _features &= ~CPU_AVX512_VPOPCNTDQ;
 945     _features &= ~CPU_AVX512_VPCLMULQDQ;
 946     _features &= ~CPU_AVX512_VAES;
 947     _features &= ~CPU_AVX512_VNNI;
 948     _features &= ~CPU_AVX512_VBMI;
 949     _features &= ~CPU_AVX512_VBMI2;
 950     _features &= ~CPU_AVX512_BITALG;
 951     _features &= ~CPU_AVX512_IFMA;
 952   }
 953 
 954   if (UseAVX < 2)
 955     _features &= ~CPU_AVX2;
 956 
 957   if (UseAVX < 1) {
 958     _features &= ~CPU_AVX;
 959     _features &= ~CPU_VZEROUPPER;
 960     _features &= ~CPU_F16C;
 961   }
 962 
 963   if (logical_processors_per_package() == 1) {
 964     // HT processor could be installed on a system which doesn't support HT.
 965     _features &= ~CPU_HT;
 966   }
 967 
 968   if (is_intel()) { // Intel cpus specific settings
 969     if (is_knights_family()) {
 970       _features &= ~CPU_VZEROUPPER;
 971       _features &= ~CPU_AVX512BW;
 972       _features &= ~CPU_AVX512VL;
 973       _features &= ~CPU_AVX512DQ;
 974       _features &= ~CPU_AVX512_VNNI;
 975       _features &= ~CPU_AVX512_VAES;
 976       _features &= ~CPU_AVX512_VPOPCNTDQ;
 977       _features &= ~CPU_AVX512_VPCLMULQDQ;
 978       _features &= ~CPU_AVX512_VBMI;
 979       _features &= ~CPU_AVX512_VBMI2;
 980       _features &= ~CPU_CLWB;
 981       _features &= ~CPU_FLUSHOPT;
 982       _features &= ~CPU_GFNI;
 983       _features &= ~CPU_AVX512_BITALG;
 984       _features &= ~CPU_AVX512_IFMA;
 985     }
 986   }
 987 
 988   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
 989     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
 990   } else {
 991     _has_intel_jcc_erratum = IntelJccErratumMitigation;
 992   }
 993 
 994   char buf[1024];
 995   int res = jio_snprintf(
 996               buf, sizeof(buf),
 997               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
 998               cores_per_cpu(), threads_per_core(),
 999               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1000   assert(res > 0, "not enough temporary space allocated");
1001   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
1002 
1003   _features_string = os::strdup(buf);
1004 
1005   // Use AES instructions if available.
1006   if (supports_aes()) {
1007     if (FLAG_IS_DEFAULT(UseAES)) {
1008       FLAG_SET_DEFAULT(UseAES, true);
1009     }
1010     if (!UseAES) {
1011       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1012         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1013       }
1014       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1015     } else {
1016       if (UseSSE > 2) {
1017         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1018           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1019         }
1020       } else {
1021         // The AES intrinsic stubs require AES instruction support (of course)
1022         // but also require sse3 mode or higher for instructions it use.
1023         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1024           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1025         }
1026         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1027       }
1028 
1029       // --AES-CTR begins--
1030       if (!UseAESIntrinsics) {
1031         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1032           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1033           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1034         }
1035       } else {
1036         if (supports_sse4_1()) {
1037           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1038             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1039           }
1040         } else {
1041            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1042            // but also require sse4.1 mode or higher for instructions it use.
1043           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1044              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1045            }
1046            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1047         }
1048       }
1049       // --AES-CTR ends--
1050     }
1051   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1052     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1053       warning("AES instructions are not available on this CPU");
1054       FLAG_SET_DEFAULT(UseAES, false);
1055     }
1056     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1057       warning("AES intrinsics are not available on this CPU");
1058       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1059     }
1060     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1061       warning("AES-CTR intrinsics are not available on this CPU");
1062       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1063     }
1064   }
1065 
1066   // Use CLMUL instructions if available.
1067   if (supports_clmul()) {
1068     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1069       UseCLMUL = true;
1070     }
1071   } else if (UseCLMUL) {
1072     if (!FLAG_IS_DEFAULT(UseCLMUL))
1073       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1074     FLAG_SET_DEFAULT(UseCLMUL, false);
1075   }
1076 
1077   if (UseCLMUL && (UseSSE > 2)) {
1078     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1079       UseCRC32Intrinsics = true;
1080     }
1081   } else if (UseCRC32Intrinsics) {
1082     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1083       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1084     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1085   }
1086 
1087 #ifdef _LP64
1088   if (supports_avx2()) {
1089     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1090       UseAdler32Intrinsics = true;
1091     }
1092   } else if (UseAdler32Intrinsics) {
1093     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1094       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1095     }
1096     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1097   }
1098 #else
1099   if (UseAdler32Intrinsics) {
1100     warning("Adler32Intrinsics not available on this CPU.");
1101     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1102   }
1103 #endif
1104 
1105   if (supports_sse4_2() && supports_clmul()) {
1106     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1107       UseCRC32CIntrinsics = true;
1108     }
1109   } else if (UseCRC32CIntrinsics) {
1110     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1111       warning("CRC32C intrinsics are not available on this CPU");
1112     }
1113     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1114   }
1115 
1116   // GHASH/GCM intrinsics
1117   if (UseCLMUL && (UseSSE > 2)) {
1118     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1119       UseGHASHIntrinsics = true;
1120     }
1121   } else if (UseGHASHIntrinsics) {
1122     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1123       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1124     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1125   }
1126 
1127   // ChaCha20 Intrinsics
1128   // As long as the system supports AVX as a baseline we can do a
1129   // SIMD-enabled block function.  StubGenerator makes the determination
1130   // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1131   // version.
1132   if (UseAVX >= 1) {
1133       if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1134           UseChaCha20Intrinsics = true;
1135       }
1136   } else if (UseChaCha20Intrinsics) {
1137       if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1138           warning("ChaCha20 intrinsic requires AVX instructions");
1139       }
1140       FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1141   }
1142 
1143   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1144   if (UseAVX >= 2) {
1145     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1146       UseBASE64Intrinsics = true;
1147     }
1148   } else if (UseBASE64Intrinsics) {
1149      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1150       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1151     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1152   }
1153 
1154   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1155     if (FLAG_IS_DEFAULT(UseFMA)) {
1156       UseFMA = true;
1157     }
1158   } else if (UseFMA) {
1159     warning("FMA instructions are not available on this CPU");
1160     FLAG_SET_DEFAULT(UseFMA, false);
1161   }
1162 
1163   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1164     UseMD5Intrinsics = true;
1165   }
1166 
1167   if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
1168     if (FLAG_IS_DEFAULT(UseSHA)) {
1169       UseSHA = true;
1170     }
1171   } else if (UseSHA) {
1172     warning("SHA instructions are not available on this CPU");
1173     FLAG_SET_DEFAULT(UseSHA, false);
1174   }
1175 
1176   if (supports_sha() && supports_sse4_1() && UseSHA) {
1177     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1178       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1179     }
1180   } else if (UseSHA1Intrinsics) {
1181     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1182     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1183   }
1184 
1185   if (supports_sse4_1() && UseSHA) {
1186     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1187       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1188     }
1189   } else if (UseSHA256Intrinsics) {
1190     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1191     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1192   }
1193 
1194 #ifdef _LP64
1195   // These are only supported on 64-bit
1196   if (UseSHA && supports_avx2() && supports_bmi2()) {
1197     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1198       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1199     }
1200   } else
1201 #endif
1202   if (UseSHA512Intrinsics) {
1203     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1204     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1205   }
1206 
1207   if (UseSHA3Intrinsics) {
1208     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1209     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1210   }
1211 
1212   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1213     FLAG_SET_DEFAULT(UseSHA, false);
1214   }
1215 
1216   if (!supports_rtm() && UseRTMLocking) {
1217     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1218   }
1219 
1220 #if INCLUDE_RTM_OPT
1221   if (UseRTMLocking) {
1222     if (!CompilerConfig::is_c2_enabled()) {
1223       // Only C2 does RTM locking optimization.
1224       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1225     }
1226     if (is_intel_family_core()) {
1227       if ((_model == CPU_MODEL_HASWELL_E3) ||
1228           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1229           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1230         // currently a collision between SKL and HSW_E3
1231         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1232           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1233                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1234         } else {
1235           warning("UseRTMLocking is only available as experimental option on this platform.");
1236         }
1237       }
1238     }
1239     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1240       // RTM locking should be used only for applications with
1241       // high lock contention. For now we do not use it by default.
1242       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1243     }
1244   } else { // !UseRTMLocking
1245     if (UseRTMForStackLocks) {
1246       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1247         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1248       }
1249       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1250     }
1251     if (UseRTMDeopt) {
1252       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1253     }
1254     if (PrintPreciseRTMLockingStatistics) {
1255       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1256     }
1257   }
1258 #else
1259   if (UseRTMLocking) {
1260     // Only C2 does RTM locking optimization.
1261     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1262   }
1263 #endif
1264 
1265 #ifdef COMPILER2
1266   if (UseFPUForSpilling) {
1267     if (UseSSE < 2) {
1268       // Only supported with SSE2+
1269       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1270     }
1271   }
1272 #endif
1273 
1274 #if COMPILER2_OR_JVMCI
1275   int max_vector_size = 0;
1276   if (UseSSE < 2) {
1277     // Vectors (in XMM) are only supported with SSE2+
1278     // SSE is always 2 on x64.
1279     max_vector_size = 0;
1280   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1281     // 16 byte vectors (in XMM) are supported with SSE2+
1282     max_vector_size = 16;
1283   } else if (UseAVX == 1 || UseAVX == 2) {
1284     // 32 bytes vectors (in YMM) are only supported with AVX+
1285     max_vector_size = 32;
1286   } else if (UseAVX > 2) {
1287     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1288     max_vector_size = 64;
1289   }
1290 
1291 #ifdef _LP64
1292   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1293 #else
1294   int min_vector_size = 0;
1295 #endif
1296 
1297   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1298     if (MaxVectorSize < min_vector_size) {
1299       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1300       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1301     }
1302     if (MaxVectorSize > max_vector_size) {
1303       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1304       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1305     }
1306     if (!is_power_of_2(MaxVectorSize)) {
1307       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1308       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1309     }
1310   } else {
1311     // If default, use highest supported configuration
1312     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1313   }
1314 
1315 #if defined(COMPILER2) && defined(ASSERT)
1316   if (MaxVectorSize > 0) {
1317     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1318       tty->print_cr("State of YMM registers after signal handle:");
1319       int nreg = 2 LP64_ONLY(+2);
1320       const char* ymm_name[4] = {"0", "7", "8", "15"};
1321       for (int i = 0; i < nreg; i++) {
1322         tty->print("YMM%s:", ymm_name[i]);
1323         for (int j = 7; j >=0; j--) {
1324           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1325         }
1326         tty->cr();
1327       }
1328     }
1329   }
1330 #endif // COMPILER2 && ASSERT
1331 
1332 #ifdef _LP64
1333   if (supports_avx512ifma() && supports_avx512vlbw() && MaxVectorSize >= 64) {
1334     if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1335       FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1336     }
1337   } else
1338 #endif
1339   if (UsePoly1305Intrinsics) {
1340     warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1341     FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1342   }
1343 
1344 #ifdef _LP64
1345   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1346     UseMultiplyToLenIntrinsic = true;
1347   }
1348   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1349     UseSquareToLenIntrinsic = true;
1350   }
1351   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1352     UseMulAddIntrinsic = true;
1353   }
1354   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1355     UseMontgomeryMultiplyIntrinsic = true;
1356   }
1357   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1358     UseMontgomerySquareIntrinsic = true;
1359   }
1360 #else
1361   if (UseMultiplyToLenIntrinsic) {
1362     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1363       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1364     }
1365     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1366   }
1367   if (UseMontgomeryMultiplyIntrinsic) {
1368     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1369       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1370     }
1371     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1372   }
1373   if (UseMontgomerySquareIntrinsic) {
1374     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1375       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1376     }
1377     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1378   }
1379   if (UseSquareToLenIntrinsic) {
1380     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1381       warning("squareToLen intrinsic is not available in 32-bit VM");
1382     }
1383     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1384   }
1385   if (UseMulAddIntrinsic) {
1386     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1387       warning("mulAdd intrinsic is not available in 32-bit VM");
1388     }
1389     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1390   }
1391 #endif // _LP64
1392 #endif // COMPILER2_OR_JVMCI
1393 
1394   // On new cpus instructions which update whole XMM register should be used
1395   // to prevent partial register stall due to dependencies on high half.
1396   //
1397   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1398   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1399   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1400   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1401 
1402 
1403   if (is_zx()) { // ZX cpus specific settings
1404     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1405       UseStoreImmI16 = false; // don't use it on ZX cpus
1406     }
1407     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1408       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1409         // Use it on all ZX cpus
1410         UseAddressNop = true;
1411       }
1412     }
1413     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1414       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1415     }
1416     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1417       if (supports_sse3()) {
1418         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1419       } else {
1420         UseXmmRegToRegMoveAll = false;
1421       }
1422     }
1423     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1424 #ifdef COMPILER2
1425       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1426         // For new ZX cpus do the next optimization:
1427         // don't align the beginning of a loop if there are enough instructions
1428         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1429         // in current fetch line (OptoLoopAlignment) or the padding
1430         // is big (> MaxLoopPad).
1431         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1432         // generated NOP instructions. 11 is the largest size of one
1433         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1434         MaxLoopPad = 11;
1435       }
1436 #endif // COMPILER2
1437       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1438         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1439       }
1440       if (supports_sse4_2()) { // new ZX cpus
1441         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1442           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1443         }
1444       }
1445       if (supports_sse4_2()) {
1446         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1447           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1448         }
1449       } else {
1450         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1451           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1452         }
1453         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1454       }
1455     }
1456 
1457     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1458       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1459     }
1460   }
1461 
1462   if (is_amd_family()) { // AMD cpus specific settings
1463     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1464       // Use it on new AMD cpus starting from Opteron.
1465       UseAddressNop = true;
1466     }
1467     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1468       // Use it on new AMD cpus starting from Opteron.
1469       UseNewLongLShift = true;
1470     }
1471     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1472       if (supports_sse4a()) {
1473         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1474       } else {
1475         UseXmmLoadAndClearUpper = false;
1476       }
1477     }
1478     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1479       if (supports_sse4a()) {
1480         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1481       } else {
1482         UseXmmRegToRegMoveAll = false;
1483       }
1484     }
1485     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1486       if (supports_sse4a()) {
1487         UseXmmI2F = true;
1488       } else {
1489         UseXmmI2F = false;
1490       }
1491     }
1492     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1493       if (supports_sse4a()) {
1494         UseXmmI2D = true;
1495       } else {
1496         UseXmmI2D = false;
1497       }
1498     }
1499     if (supports_sse4_2()) {
1500       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1501         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1502       }
1503     } else {
1504       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1505         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1506       }
1507       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1508     }
1509 
1510     // some defaults for AMD family 15h
1511     if (cpu_family() == 0x15) {
1512       // On family 15h processors default is no sw prefetch
1513       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1514         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1515       }
1516       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1517       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1518         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1519       }
1520       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1521       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1522         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1523       }
1524       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1525         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1526       }
1527     }
1528 
1529 #ifdef COMPILER2
1530     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1531       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1532       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1533     }
1534 #endif // COMPILER2
1535 
1536     // Some defaults for AMD family >= 17h && Hygon family 18h
1537     if (cpu_family() >= 0x17) {
1538       // On family >=17h processors use XMM and UnalignedLoadStores
1539       // for Array Copy
1540       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1541         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1542       }
1543       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1544         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1545       }
1546 #ifdef COMPILER2
1547       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1548         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1549       }
1550 #endif
1551     }
1552   }
1553 
1554   if (is_intel()) { // Intel cpus specific settings
1555     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1556       UseStoreImmI16 = false; // don't use it on Intel cpus
1557     }
1558     if (cpu_family() == 6 || cpu_family() == 15) {
1559       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1560         // Use it on all Intel cpus starting from PentiumPro
1561         UseAddressNop = true;
1562       }
1563     }
1564     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1565       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1566     }
1567     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1568       if (supports_sse3()) {
1569         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1570       } else {
1571         UseXmmRegToRegMoveAll = false;
1572       }
1573     }
1574     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1575 #ifdef COMPILER2
1576       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1577         // For new Intel cpus do the next optimization:
1578         // don't align the beginning of a loop if there are enough instructions
1579         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1580         // in current fetch line (OptoLoopAlignment) or the padding
1581         // is big (> MaxLoopPad).
1582         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1583         // generated NOP instructions. 11 is the largest size of one
1584         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1585         MaxLoopPad = 11;
1586       }
1587 #endif // COMPILER2
1588 
1589       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1590         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1591       }
1592       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1593         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1594           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1595         }
1596       }
1597       if (supports_sse4_2()) {
1598         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1599           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1600         }
1601       } else {
1602         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1603           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1604         }
1605         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1606       }
1607     }
1608     if (is_atom_family() || is_knights_family()) {
1609 #ifdef COMPILER2
1610       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1611         OptoScheduling = true;
1612       }
1613 #endif
1614       if (supports_sse4_2()) { // Silvermont
1615         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1616           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1617         }
1618       }
1619       if (FLAG_IS_DEFAULT(UseIncDec)) {
1620         FLAG_SET_DEFAULT(UseIncDec, false);
1621       }
1622     }
1623     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1624       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1625     }
1626 #ifdef COMPILER2
1627     if (UseAVX > 2) {
1628       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1629           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1630            ArrayOperationPartialInlineSize != 0 &&
1631            ArrayOperationPartialInlineSize != 16 &&
1632            ArrayOperationPartialInlineSize != 32 &&
1633            ArrayOperationPartialInlineSize != 64)) {
1634         int inline_size = 0;
1635         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1636           inline_size = 64;
1637         } else if (MaxVectorSize >= 32) {
1638           inline_size = 32;
1639         } else if (MaxVectorSize >= 16) {
1640           inline_size = 16;
1641         }
1642         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1643           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1644         }
1645         ArrayOperationPartialInlineSize = inline_size;
1646       }
1647 
1648       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1649         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1650         if (ArrayOperationPartialInlineSize) {
1651           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1652         } else {
1653           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1654         }
1655       }
1656     }
1657 #endif
1658   }
1659 
1660 #ifdef COMPILER2
1661   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1662     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1663       OptimizeFill = false;
1664     }
1665   }
1666 #endif
1667 
1668 #ifdef _LP64
1669   if (UseSSE42Intrinsics) {
1670     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1671       UseVectorizedMismatchIntrinsic = true;
1672     }
1673   } else if (UseVectorizedMismatchIntrinsic) {
1674     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1675       warning("vectorizedMismatch intrinsics are not available on this CPU");
1676     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1677   }
1678   if (UseAVX >= 2) {
1679     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1680   } else if (UseVectorizedHashCodeIntrinsic) {
1681     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1682       warning("vectorizedHashCode intrinsics are not available on this CPU");
1683     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1684   }
1685 #else
1686   if (UseVectorizedMismatchIntrinsic) {
1687     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1688       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1689     }
1690     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1691   }
1692   if (UseVectorizedHashCodeIntrinsic) {
1693     if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1694       warning("vectorizedHashCode intrinsic is not available in 32-bit VM");
1695     }
1696     FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1697   }
1698 #endif // _LP64
1699 
1700   // Use count leading zeros count instruction if available.
1701   if (supports_lzcnt()) {
1702     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1703       UseCountLeadingZerosInstruction = true;
1704     }
1705    } else if (UseCountLeadingZerosInstruction) {
1706     warning("lzcnt instruction is not available on this CPU");
1707     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1708   }
1709 
1710   // Use count trailing zeros instruction if available
1711   if (supports_bmi1()) {
1712     // tzcnt does not require VEX prefix
1713     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1714       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1715         // Don't use tzcnt if BMI1 is switched off on command line.
1716         UseCountTrailingZerosInstruction = false;
1717       } else {
1718         UseCountTrailingZerosInstruction = true;
1719       }
1720     }
1721   } else if (UseCountTrailingZerosInstruction) {
1722     warning("tzcnt instruction is not available on this CPU");
1723     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1724   }
1725 
1726   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1727   // VEX prefix is generated only when AVX > 0.
1728   if (supports_bmi1() && supports_avx()) {
1729     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1730       UseBMI1Instructions = true;
1731     }
1732   } else if (UseBMI1Instructions) {
1733     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1734     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1735   }
1736 
1737   if (supports_bmi2() && supports_avx()) {
1738     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1739       UseBMI2Instructions = true;
1740     }
1741   } else if (UseBMI2Instructions) {
1742     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1743     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1744   }
1745 
1746   // Use population count instruction if available.
1747   if (supports_popcnt()) {
1748     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1749       UsePopCountInstruction = true;
1750     }
1751   } else if (UsePopCountInstruction) {
1752     warning("POPCNT instruction is not available on this CPU");
1753     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1754   }
1755 
1756   // Use fast-string operations if available.
1757   if (supports_erms()) {
1758     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1759       UseFastStosb = true;
1760     }
1761   } else if (UseFastStosb) {
1762     warning("fast-string operations are not available on this CPU");
1763     FLAG_SET_DEFAULT(UseFastStosb, false);
1764   }
1765 
1766   // For AMD Processors use XMM/YMM MOVDQU instructions
1767   // for Object Initialization as default
1768   if (is_amd() && cpu_family() >= 0x19) {
1769     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1770       UseFastStosb = false;
1771     }
1772   }
1773 
1774 #ifdef COMPILER2
1775   if (is_intel() && MaxVectorSize > 16) {
1776     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1777       UseFastStosb = false;
1778     }
1779   }
1780 #endif
1781 
1782   // Use XMM/YMM MOVDQU instruction for Object Initialization
1783   if (UseSSE >= 2 && UseUnalignedLoadStores) {
1784     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1785       UseXMMForObjInit = true;
1786     }
1787   } else if (UseXMMForObjInit) {
1788     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1789     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1790   }
1791 
1792 #ifdef COMPILER2
1793   if (FLAG_IS_DEFAULT(AlignVector)) {
1794     // Modern processors allow misaligned memory operations for vectors.
1795     AlignVector = !UseUnalignedLoadStores;
1796   }
1797 #endif // COMPILER2
1798 
1799   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1800     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1801       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1802     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1803       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1804     }
1805   }
1806 
1807   // Allocation prefetch settings
1808   intx cache_line_size = prefetch_data_size();
1809   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1810       (cache_line_size > AllocatePrefetchStepSize)) {
1811     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1812   }
1813 
1814   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1815     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1816     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1817       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1818     }
1819     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1820   }
1821 
1822   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1823     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1824     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1825   }
1826 
1827   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1828     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1829         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1830       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1831     }
1832 #ifdef COMPILER2
1833     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1834       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1835     }
1836 #endif
1837   }
1838 
1839   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1840 #ifdef COMPILER2
1841     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1842       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1843     }
1844 #endif
1845   }
1846 
1847 #ifdef _LP64
1848   // Prefetch settings
1849 
1850   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1851   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1852   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1853   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1854 
1855   // gc copy/scan is disabled if prefetchw isn't supported, because
1856   // Prefetch::write emits an inlined prefetchw on Linux.
1857   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1858   // The used prefetcht0 instruction works for both amd64 and em64t.
1859 
1860   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1861     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1862   }
1863   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1864     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1865   }
1866 #endif
1867 
1868   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1869      (cache_line_size > ContendedPaddingWidth))
1870      ContendedPaddingWidth = cache_line_size;
1871 
1872   // This machine allows unaligned memory accesses
1873   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1874     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1875   }
1876 
1877 #ifndef PRODUCT
1878   if (log_is_enabled(Info, os, cpu)) {
1879     LogStream ls(Log(os, cpu)::info());
1880     outputStream* log = &ls;
1881     log->print_cr("Logical CPUs per core: %u",
1882                   logical_processors_per_package());
1883     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1884     log->print("UseSSE=%d", UseSSE);
1885     if (UseAVX > 0) {
1886       log->print("  UseAVX=%d", UseAVX);
1887     }
1888     if (UseAES) {
1889       log->print("  UseAES=1");
1890     }
1891 #ifdef COMPILER2
1892     if (MaxVectorSize > 0) {
1893       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1894     }
1895 #endif
1896     log->cr();
1897     log->print("Allocation");
1898     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1899       log->print_cr(": no prefetching");
1900     } else {
1901       log->print(" prefetching: ");
1902       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1903         log->print("PREFETCHW");
1904       } else if (UseSSE >= 1) {
1905         if (AllocatePrefetchInstr == 0) {
1906           log->print("PREFETCHNTA");
1907         } else if (AllocatePrefetchInstr == 1) {
1908           log->print("PREFETCHT0");
1909         } else if (AllocatePrefetchInstr == 2) {
1910           log->print("PREFETCHT2");
1911         } else if (AllocatePrefetchInstr == 3) {
1912           log->print("PREFETCHW");
1913         }
1914       }
1915       if (AllocatePrefetchLines > 1) {
1916         log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1917       } else {
1918         log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1919       }
1920     }
1921 
1922     if (PrefetchCopyIntervalInBytes > 0) {
1923       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1924     }
1925     if (PrefetchScanIntervalInBytes > 0) {
1926       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1927     }
1928     if (ContendedPaddingWidth > 0) {
1929       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1930     }
1931   }
1932 #endif // !PRODUCT
1933   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1934       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1935   }
1936   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1937       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1938   }
1939 }
1940 
1941 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1942   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1943   if (vrt == XenHVM) {
1944     st->print_cr("Xen hardware-assisted virtualization detected");
1945   } else if (vrt == KVM) {
1946     st->print_cr("KVM virtualization detected");
1947   } else if (vrt == VMWare) {
1948     st->print_cr("VMWare virtualization detected");
1949     VirtualizationSupport::print_virtualization_info(st);
1950   } else if (vrt == HyperV) {
1951     st->print_cr("Hyper-V virtualization detected");
1952   } else if (vrt == HyperVRole) {
1953     st->print_cr("Hyper-V role detected");
1954   }
1955 }
1956 
1957 bool VM_Version::compute_has_intel_jcc_erratum() {
1958   if (!is_intel_family_core()) {
1959     // Only Intel CPUs are affected.
1960     return false;
1961   }
1962   // The following table of affected CPUs is based on the following document released by Intel:
1963   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1964   switch (_model) {
1965   case 0x8E:
1966     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1967     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1968     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1969     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1970     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1971     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1972     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1973     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1974     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1975     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1976   case 0x4E:
1977     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1978     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1979     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1980     return _stepping == 0x3;
1981   case 0x55:
1982     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1983     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1984     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1985     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1986     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1987     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1988     return _stepping == 0x4 || _stepping == 0x7;
1989   case 0x5E:
1990     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1991     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1992     return _stepping == 0x3;
1993   case 0x9E:
1994     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1995     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1996     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1997     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1998     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1999     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2000     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2001     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2002     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2003     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2004     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2005     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2006     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2007     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2008     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2009   case 0xA5:
2010     // Not in Intel documentation.
2011     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2012     return true;
2013   case 0xA6:
2014     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2015     return _stepping == 0x0;
2016   case 0xAE:
2017     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2018     return _stepping == 0xA;
2019   default:
2020     // If we are running on another intel machine not recognized in the table, we are okay.
2021     return false;
2022   }
2023 }
2024 
2025 // On Xen, the cpuid instruction returns
2026 //  eax / registers[0]: Version of Xen
2027 //  ebx / registers[1]: chars 'XenV'
2028 //  ecx / registers[2]: chars 'MMXe'
2029 //  edx / registers[3]: chars 'nVMM'
2030 //
2031 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2032 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2033 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2034 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
2035 //
2036 // more information :
2037 // https://kb.vmware.com/s/article/1009458
2038 //
2039 void VM_Version::check_virtualizations() {
2040   uint32_t registers[4] = {0};
2041   char signature[13] = {0};
2042 
2043   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2044   // from 0x40000000 until 0x40010000.
2045   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2046   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2047     detect_virt_stub(leaf, registers);
2048     memcpy(signature, &registers[1], 12);
2049 
2050     if (strncmp("VMwareVMware", signature, 12) == 0) {
2051       Abstract_VM_Version::_detected_virtualization = VMWare;
2052       // check for extended metrics from guestlib
2053       VirtualizationSupport::initialize();
2054     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2055       Abstract_VM_Version::_detected_virtualization = HyperV;
2056 #ifdef _WINDOWS
2057       // CPUID leaf 0x40000007 is available to the root partition only.
2058       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2059       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2060       detect_virt_stub(0x40000007, registers);
2061       if ((registers[0] != 0x0) ||
2062           (registers[1] != 0x0) ||
2063           (registers[2] != 0x0) ||
2064           (registers[3] != 0x0)) {
2065         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2066       }
2067 #endif
2068     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2069       Abstract_VM_Version::_detected_virtualization = KVM;
2070     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2071       Abstract_VM_Version::_detected_virtualization = XenHVM;
2072     }
2073   }
2074 }
2075 
2076 #ifdef COMPILER2
2077 // Determine if it's running on Cascade Lake using default options.
2078 bool VM_Version::is_default_intel_cascade_lake() {
2079   return FLAG_IS_DEFAULT(UseAVX) &&
2080          FLAG_IS_DEFAULT(MaxVectorSize) &&
2081          UseAVX > 2 &&
2082          is_intel_cascade_lake();
2083 }
2084 #endif
2085 
2086 bool VM_Version::is_intel_cascade_lake() {
2087   return is_intel_skylake() && _stepping >= 5;
2088 }
2089 
2090 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2091 // for implementing the array copy and clear operations.
2092 // The Intel platforms that supports the serialize instruction
2093 // has improved implementation of 64-byte load/stores and so the default
2094 // threshold is set to 0 for these platforms.
2095 int VM_Version::avx3_threshold() {
2096   return (is_intel_family_core() &&
2097           supports_serialize() &&
2098           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2099 }
2100 
2101 static bool _vm_version_initialized = false;
2102 
2103 void VM_Version::initialize() {
2104   ResourceMark rm;
2105   // Making this stub must be FIRST use of assembler
2106   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2107   if (stub_blob == nullptr) {
2108     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2109   }
2110   CodeBuffer c(stub_blob);
2111   VM_Version_StubGenerator g(&c);
2112 
2113   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2114                                      g.generate_get_cpu_info());
2115   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2116                                      g.generate_detect_virt());
2117 
2118   get_processor_features();
2119 
2120   LP64_ONLY(Assembler::precompute_instructions();)
2121 
2122   if (VM_Version::supports_hv()) { // Supports hypervisor
2123     check_virtualizations();
2124   }
2125   _vm_version_initialized = true;
2126 }
2127 
2128 typedef enum {
2129    CPU_FAMILY_8086_8088  = 0,
2130    CPU_FAMILY_INTEL_286  = 2,
2131    CPU_FAMILY_INTEL_386  = 3,
2132    CPU_FAMILY_INTEL_486  = 4,
2133    CPU_FAMILY_PENTIUM    = 5,
2134    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2135    CPU_FAMILY_PENTIUM_4  = 0xF
2136 } FamilyFlag;
2137 
2138 typedef enum {
2139   RDTSCP_FLAG  = 0x08000000, // bit 27
2140   INTEL64_FLAG = 0x20000000  // bit 29
2141 } _featureExtendedEdxFlag;
2142 
2143 typedef enum {
2144    FPU_FLAG     = 0x00000001,
2145    VME_FLAG     = 0x00000002,
2146    DE_FLAG      = 0x00000004,
2147    PSE_FLAG     = 0x00000008,
2148    TSC_FLAG     = 0x00000010,
2149    MSR_FLAG     = 0x00000020,
2150    PAE_FLAG     = 0x00000040,
2151    MCE_FLAG     = 0x00000080,
2152    CX8_FLAG     = 0x00000100,
2153    APIC_FLAG    = 0x00000200,
2154    SEP_FLAG     = 0x00000800,
2155    MTRR_FLAG    = 0x00001000,
2156    PGE_FLAG     = 0x00002000,
2157    MCA_FLAG     = 0x00004000,
2158    CMOV_FLAG    = 0x00008000,
2159    PAT_FLAG     = 0x00010000,
2160    PSE36_FLAG   = 0x00020000,
2161    PSNUM_FLAG   = 0x00040000,
2162    CLFLUSH_FLAG = 0x00080000,
2163    DTS_FLAG     = 0x00200000,
2164    ACPI_FLAG    = 0x00400000,
2165    MMX_FLAG     = 0x00800000,
2166    FXSR_FLAG    = 0x01000000,
2167    SSE_FLAG     = 0x02000000,
2168    SSE2_FLAG    = 0x04000000,
2169    SS_FLAG      = 0x08000000,
2170    HTT_FLAG     = 0x10000000,
2171    TM_FLAG      = 0x20000000
2172 } FeatureEdxFlag;
2173 
2174 static BufferBlob* cpuid_brand_string_stub_blob;
2175 static const int   cpuid_brand_string_stub_size = 550;
2176 
2177 extern "C" {
2178   typedef void (*getCPUIDBrandString_stub_t)(void*);
2179 }
2180 
2181 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2182 
2183 // VM_Version statics
2184 enum {
2185   ExtendedFamilyIdLength_INTEL = 16,
2186   ExtendedFamilyIdLength_AMD   = 24
2187 };
2188 
2189 const size_t VENDOR_LENGTH = 13;
2190 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2191 static char* _cpu_brand_string = nullptr;
2192 static int64_t _max_qualified_cpu_frequency = 0;
2193 
2194 static int _no_of_threads = 0;
2195 static int _no_of_cores = 0;
2196 
2197 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2198   "8086/8088",
2199   "",
2200   "286",
2201   "386",
2202   "486",
2203   "Pentium",
2204   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2205   "",
2206   "",
2207   "",
2208   "",
2209   "",
2210   "",
2211   "",
2212   "",
2213   "Pentium 4"
2214 };
2215 
2216 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2217   "",
2218   "",
2219   "",
2220   "",
2221   "5x86",
2222   "K5/K6",
2223   "Athlon/AthlonXP",
2224   "",
2225   "",
2226   "",
2227   "",
2228   "",
2229   "",
2230   "",
2231   "",
2232   "Opteron/Athlon64",
2233   "Opteron QC/Phenom",  // Barcelona et.al.
2234   "",
2235   "",
2236   "",
2237   "",
2238   "",
2239   "",
2240   "Zen"
2241 };
2242 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2243 // September 2013, Vol 3C Table 35-1
2244 const char* const _model_id_pentium_pro[] = {
2245   "",
2246   "Pentium Pro",
2247   "",
2248   "Pentium II model 3",
2249   "",
2250   "Pentium II model 5/Xeon/Celeron",
2251   "Celeron",
2252   "Pentium III/Pentium III Xeon",
2253   "Pentium III/Pentium III Xeon",
2254   "Pentium M model 9",    // Yonah
2255   "Pentium III, model A",
2256   "Pentium III, model B",
2257   "",
2258   "Pentium M model D",    // Dothan
2259   "",
2260   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2261   "",
2262   "",
2263   "",
2264   "",
2265   "",
2266   "",
2267   "Celeron",              // 0x16 Celeron 65nm
2268   "Core 2",               // 0x17 Penryn / Harpertown
2269   "",
2270   "",
2271   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2272   "Atom",                 // 0x1B Z5xx series Silverthorn
2273   "",
2274   "Core 2",               // 0x1D Dunnington (6-core)
2275   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2276   "",
2277   "",
2278   "",
2279   "",
2280   "",
2281   "",
2282   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2283   "",
2284   "",
2285   "",                     // 0x28
2286   "",
2287   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2288   "",
2289   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2290   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2291   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2292   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2293   "",
2294   "",
2295   "",
2296   "",
2297   "",
2298   "",
2299   "",
2300   "",
2301   "",
2302   "",
2303   "Ivy Bridge",           // 0x3a
2304   "",
2305   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2306   "",                     // 0x3d "Next Generation Intel Core Processor"
2307   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2308   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2309   "",
2310   "",
2311   "",
2312   "",
2313   "",
2314   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2315   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2316   nullptr
2317 };
2318 
2319 /* Brand ID is for back compatibility
2320  * Newer CPUs uses the extended brand string */
2321 const char* const _brand_id[] = {
2322   "",
2323   "Celeron processor",
2324   "Pentium III processor",
2325   "Intel Pentium III Xeon processor",
2326   "",
2327   "",
2328   "",
2329   "",
2330   "Intel Pentium 4 processor",
2331   nullptr
2332 };
2333 
2334 
2335 const char* const _feature_edx_id[] = {
2336   "On-Chip FPU",
2337   "Virtual Mode Extensions",
2338   "Debugging Extensions",
2339   "Page Size Extensions",
2340   "Time Stamp Counter",
2341   "Model Specific Registers",
2342   "Physical Address Extension",
2343   "Machine Check Exceptions",
2344   "CMPXCHG8B Instruction",
2345   "On-Chip APIC",
2346   "",
2347   "Fast System Call",
2348   "Memory Type Range Registers",
2349   "Page Global Enable",
2350   "Machine Check Architecture",
2351   "Conditional Mov Instruction",
2352   "Page Attribute Table",
2353   "36-bit Page Size Extension",
2354   "Processor Serial Number",
2355   "CLFLUSH Instruction",
2356   "",
2357   "Debug Trace Store feature",
2358   "ACPI registers in MSR space",
2359   "Intel Architecture MMX Technology",
2360   "Fast Float Point Save and Restore",
2361   "Streaming SIMD extensions",
2362   "Streaming SIMD extensions 2",
2363   "Self-Snoop",
2364   "Hyper Threading",
2365   "Thermal Monitor",
2366   "",
2367   "Pending Break Enable"
2368 };
2369 
2370 const char* const _feature_extended_edx_id[] = {
2371   "",
2372   "",
2373   "",
2374   "",
2375   "",
2376   "",
2377   "",
2378   "",
2379   "",
2380   "",
2381   "",
2382   "SYSCALL/SYSRET",
2383   "",
2384   "",
2385   "",
2386   "",
2387   "",
2388   "",
2389   "",
2390   "",
2391   "Execute Disable Bit",
2392   "",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "RDTSCP",
2399   "",
2400   "Intel 64 Architecture",
2401   "",
2402   ""
2403 };
2404 
2405 const char* const _feature_ecx_id[] = {
2406   "Streaming SIMD Extensions 3",
2407   "PCLMULQDQ",
2408   "64-bit DS Area",
2409   "MONITOR/MWAIT instructions",
2410   "CPL Qualified Debug Store",
2411   "Virtual Machine Extensions",
2412   "Safer Mode Extensions",
2413   "Enhanced Intel SpeedStep technology",
2414   "Thermal Monitor 2",
2415   "Supplemental Streaming SIMD Extensions 3",
2416   "L1 Context ID",
2417   "",
2418   "Fused Multiply-Add",
2419   "CMPXCHG16B",
2420   "xTPR Update Control",
2421   "Perfmon and Debug Capability",
2422   "",
2423   "Process-context identifiers",
2424   "Direct Cache Access",
2425   "Streaming SIMD extensions 4.1",
2426   "Streaming SIMD extensions 4.2",
2427   "x2APIC",
2428   "MOVBE",
2429   "Popcount instruction",
2430   "TSC-Deadline",
2431   "AESNI",
2432   "XSAVE",
2433   "OSXSAVE",
2434   "AVX",
2435   "F16C",
2436   "RDRAND",
2437   ""
2438 };
2439 
2440 const char* const _feature_extended_ecx_id[] = {
2441   "LAHF/SAHF instruction support",
2442   "Core multi-processor legacy mode",
2443   "",
2444   "",
2445   "",
2446   "Advanced Bit Manipulations: LZCNT",
2447   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2448   "Misaligned SSE mode",
2449   "",
2450   "",
2451   "",
2452   "",
2453   "",
2454   "",
2455   "",
2456   "",
2457   "",
2458   "",
2459   "",
2460   "",
2461   "",
2462   "",
2463   "",
2464   "",
2465   "",
2466   "",
2467   "",
2468   "",
2469   "",
2470   "",
2471   "",
2472   ""
2473 };
2474 
2475 void VM_Version::initialize_tsc(void) {
2476   ResourceMark rm;
2477 
2478   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2479   if (cpuid_brand_string_stub_blob == nullptr) {
2480     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2481   }
2482   CodeBuffer c(cpuid_brand_string_stub_blob);
2483   VM_Version_StubGenerator g(&c);
2484   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2485                                    g.generate_getCPUIDBrandString());
2486 }
2487 
2488 const char* VM_Version::cpu_model_description(void) {
2489   uint32_t cpu_family = extended_cpu_family();
2490   uint32_t cpu_model = extended_cpu_model();
2491   const char* model = nullptr;
2492 
2493   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2494     for (uint32_t i = 0; i <= cpu_model; i++) {
2495       model = _model_id_pentium_pro[i];
2496       if (model == nullptr) {
2497         break;
2498       }
2499     }
2500   }
2501   return model;
2502 }
2503 
2504 const char* VM_Version::cpu_brand_string(void) {
2505   if (_cpu_brand_string == nullptr) {
2506     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2507     if (nullptr == _cpu_brand_string) {
2508       return nullptr;
2509     }
2510     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2511     if (ret_val != OS_OK) {
2512       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2513       _cpu_brand_string = nullptr;
2514     }
2515   }
2516   return _cpu_brand_string;
2517 }
2518 
2519 const char* VM_Version::cpu_brand(void) {
2520   const char*  brand  = nullptr;
2521 
2522   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2523     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2524     brand = _brand_id[0];
2525     for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2526       brand = _brand_id[i];
2527     }
2528   }
2529   return brand;
2530 }
2531 
2532 bool VM_Version::cpu_is_em64t(void) {
2533   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2534 }
2535 
2536 bool VM_Version::is_netburst(void) {
2537   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2538 }
2539 
2540 bool VM_Version::supports_tscinv_ext(void) {
2541   if (!supports_tscinv_bit()) {
2542     return false;
2543   }
2544 
2545   if (is_intel()) {
2546     return true;
2547   }
2548 
2549   if (is_amd()) {
2550     return !is_amd_Barcelona();
2551   }
2552 
2553   if (is_hygon()) {
2554     return true;
2555   }
2556 
2557   return false;
2558 }
2559 
2560 void VM_Version::resolve_cpu_information_details(void) {
2561 
2562   // in future we want to base this information on proper cpu
2563   // and cache topology enumeration such as:
2564   // Intel 64 Architecture Processor Topology Enumeration
2565   // which supports system cpu and cache topology enumeration
2566   // either using 2xAPICIDs or initial APICIDs
2567 
2568   // currently only rough cpu information estimates
2569   // which will not necessarily reflect the exact configuration of the system
2570 
2571   // this is the number of logical hardware threads
2572   // visible to the operating system
2573   _no_of_threads = os::processor_count();
2574 
2575   // find out number of threads per cpu package
2576   int threads_per_package = threads_per_core() * cores_per_cpu();
2577 
2578   // use amount of threads visible to the process in order to guess number of sockets
2579   _no_of_sockets = _no_of_threads / threads_per_package;
2580 
2581   // process might only see a subset of the total number of threads
2582   // from a single processor package. Virtualization/resource management for example.
2583   // If so then just write a hard 1 as num of pkgs.
2584   if (0 == _no_of_sockets) {
2585     _no_of_sockets = 1;
2586   }
2587 
2588   // estimate the number of cores
2589   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2590 }
2591 
2592 
2593 const char* VM_Version::cpu_family_description(void) {
2594   int cpu_family_id = extended_cpu_family();
2595   if (is_amd()) {
2596     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2597       return _family_id_amd[cpu_family_id];
2598     }
2599   }
2600   if (is_intel()) {
2601     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2602       return cpu_model_description();
2603     }
2604     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2605       return _family_id_intel[cpu_family_id];
2606     }
2607   }
2608   if (is_hygon()) {
2609     return "Dhyana";
2610   }
2611   return "Unknown x86";
2612 }
2613 
2614 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2615   assert(buf != nullptr, "buffer is null!");
2616   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2617 
2618   const char* cpu_type = nullptr;
2619   const char* x64 = nullptr;
2620 
2621   if (is_intel()) {
2622     cpu_type = "Intel";
2623     x64 = cpu_is_em64t() ? " Intel64" : "";
2624   } else if (is_amd()) {
2625     cpu_type = "AMD";
2626     x64 = cpu_is_em64t() ? " AMD64" : "";
2627   } else if (is_hygon()) {
2628     cpu_type = "Hygon";
2629     x64 = cpu_is_em64t() ? " AMD64" : "";
2630   } else {
2631     cpu_type = "Unknown x86";
2632     x64 = cpu_is_em64t() ? " x86_64" : "";
2633   }
2634 
2635   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2636     cpu_type,
2637     cpu_family_description(),
2638     supports_ht() ? " (HT)" : "",
2639     supports_sse3() ? " SSE3" : "",
2640     supports_ssse3() ? " SSSE3" : "",
2641     supports_sse4_1() ? " SSE4.1" : "",
2642     supports_sse4_2() ? " SSE4.2" : "",
2643     supports_sse4a() ? " SSE4A" : "",
2644     is_netburst() ? " Netburst" : "",
2645     is_intel_family_core() ? " Core" : "",
2646     x64);
2647 
2648   return OS_OK;
2649 }
2650 
2651 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2652   assert(buf != nullptr, "buffer is null!");
2653   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2654   assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2655 
2656   // invoke newly generated asm code to fetch CPU Brand String
2657   getCPUIDBrandString_stub(&_cpuid_info);
2658 
2659   // fetch results into buffer
2660   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2661   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2662   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2663   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2664   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2665   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2666   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2667   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2668   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2669   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2670   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2671   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2672 
2673   return OS_OK;
2674 }
2675 
2676 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2677   guarantee(buf != nullptr, "buffer is null!");
2678   guarantee(buf_len > 0, "buffer len not enough!");
2679 
2680   unsigned int flag = 0;
2681   unsigned int fi = 0;
2682   size_t       written = 0;
2683   const char*  prefix = "";
2684 
2685 #define WRITE_TO_BUF(string)                                                          \
2686   {                                                                                   \
2687     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2688     if (res < 0) {                                                                    \
2689       return buf_len - 1;                                                             \
2690     }                                                                                 \
2691     written += res;                                                                   \
2692     if (prefix[0] == '\0') {                                                          \
2693       prefix = ", ";                                                                  \
2694     }                                                                                 \
2695   }
2696 
2697   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2698     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2699       continue; /* no hyperthreading */
2700     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2701       continue; /* no fast system call */
2702     }
2703     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2704       WRITE_TO_BUF(_feature_edx_id[fi]);
2705     }
2706   }
2707 
2708   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2709     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2710       WRITE_TO_BUF(_feature_ecx_id[fi]);
2711     }
2712   }
2713 
2714   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2715     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2716       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2717     }
2718   }
2719 
2720   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2721     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2722       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2723     }
2724   }
2725 
2726   if (supports_tscinv_bit()) {
2727       WRITE_TO_BUF("Invariant TSC");
2728   }
2729 
2730   return written;
2731 }
2732 
2733 /**
2734  * Write a detailed description of the cpu to a given buffer, including
2735  * feature set.
2736  */
2737 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2738   assert(buf != nullptr, "buffer is null!");
2739   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2740 
2741   static const char* unknown = "<unknown>";
2742   char               vendor_id[VENDOR_LENGTH];
2743   const char*        family = nullptr;
2744   const char*        model = nullptr;
2745   const char*        brand = nullptr;
2746   int                outputLen = 0;
2747 
2748   family = cpu_family_description();
2749   if (family == nullptr) {
2750     family = unknown;
2751   }
2752 
2753   model = cpu_model_description();
2754   if (model == nullptr) {
2755     model = unknown;
2756   }
2757 
2758   brand = cpu_brand_string();
2759 
2760   if (brand == nullptr) {
2761     brand = cpu_brand();
2762     if (brand == nullptr) {
2763       brand = unknown;
2764     }
2765   }
2766 
2767   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2768   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2769   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2770   vendor_id[VENDOR_LENGTH-1] = '\0';
2771 
2772   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2773     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2774     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2775     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2776     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2777     "Supports: ",
2778     brand,
2779     vendor_id,
2780     family,
2781     extended_cpu_family(),
2782     model,
2783     extended_cpu_model(),
2784     cpu_stepping(),
2785     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2786     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2787     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2788     _cpuid_info.std_cpuid1_eax.value,
2789     _cpuid_info.std_cpuid1_ebx.value,
2790     _cpuid_info.std_cpuid1_ecx.value,
2791     _cpuid_info.std_cpuid1_edx.value,
2792     _cpuid_info.ext_cpuid1_eax,
2793     _cpuid_info.ext_cpuid1_ebx,
2794     _cpuid_info.ext_cpuid1_ecx,
2795     _cpuid_info.ext_cpuid1_edx);
2796 
2797   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2798     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2799     return OS_ERR;
2800   }
2801 
2802   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2803 
2804   return OS_OK;
2805 }
2806 
2807 
2808 // Fill in Abstract_VM_Version statics
2809 void VM_Version::initialize_cpu_information() {
2810   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2811   assert(!_initialized, "shouldn't be initialized yet");
2812   resolve_cpu_information_details();
2813 
2814   // initialize cpu_name and cpu_desc
2815   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2816   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2817   _initialized = true;
2818 }
2819 
2820 /**
2821  *  For information about extracting the frequency from the cpu brand string, please see:
2822  *
2823  *    Intel Processor Identification and the CPUID Instruction
2824  *    Application Note 485
2825  *    May 2012
2826  *
2827  * The return value is the frequency in Hz.
2828  */
2829 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2830   const char* const brand_string = cpu_brand_string();
2831   if (brand_string == nullptr) {
2832     return 0;
2833   }
2834   const int64_t MEGA = 1000000;
2835   int64_t multiplier = 0;
2836   int64_t frequency = 0;
2837   uint8_t idx = 0;
2838   // The brand string buffer is at most 48 bytes.
2839   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2840   for (; idx < 48-2; ++idx) {
2841     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2842     // Search brand string for "yHz" where y is M, G, or T.
2843     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2844       if (brand_string[idx] == 'M') {
2845         multiplier = MEGA;
2846       } else if (brand_string[idx] == 'G') {
2847         multiplier = MEGA * 1000;
2848       } else if (brand_string[idx] == 'T') {
2849         multiplier = MEGA * MEGA;
2850       }
2851       break;
2852     }
2853   }
2854   if (multiplier > 0) {
2855     // Compute frequency (in Hz) from brand string.
2856     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2857       frequency =  (brand_string[idx-4] - '0') * multiplier;
2858       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2859       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2860     } else { // format is "xxxx"
2861       frequency =  (brand_string[idx-4] - '0') * 1000;
2862       frequency += (brand_string[idx-3] - '0') * 100;
2863       frequency += (brand_string[idx-2] - '0') * 10;
2864       frequency += (brand_string[idx-1] - '0');
2865       frequency *= multiplier;
2866     }
2867   }
2868   return frequency;
2869 }
2870 
2871 
2872 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2873   if (_max_qualified_cpu_frequency == 0) {
2874     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2875   }
2876   return _max_qualified_cpu_frequency;
2877 }
2878 
2879 uint64_t VM_Version::feature_flags() {
2880   uint64_t result = 0;
2881   if (_cpuid_info.std_cpuid1_edx.bits.cmpxchg8 != 0)
2882     result |= CPU_CX8;
2883   if (_cpuid_info.std_cpuid1_edx.bits.cmov != 0)
2884     result |= CPU_CMOV;
2885   if (_cpuid_info.std_cpuid1_edx.bits.clflush != 0)
2886     result |= CPU_FLUSH;
2887 #ifdef _LP64
2888   // clflush should always be available on x86_64
2889   // if not we are in real trouble because we rely on it
2890   // to flush the code cache.
2891   assert ((result & CPU_FLUSH) != 0, "clflush should be available");
2892 #endif
2893   if (_cpuid_info.std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2894       _cpuid_info.ext_cpuid1_edx.bits.fxsr != 0))
2895     result |= CPU_FXSR;
2896   // HT flag is set for multi-core processors also.
2897   if (threads_per_core() > 1)
2898     result |= CPU_HT;
2899   if (_cpuid_info.std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2900       _cpuid_info.ext_cpuid1_edx.bits.mmx != 0))
2901     result |= CPU_MMX;
2902   if (_cpuid_info.std_cpuid1_edx.bits.sse != 0)
2903     result |= CPU_SSE;
2904   if (_cpuid_info.std_cpuid1_edx.bits.sse2 != 0)
2905     result |= CPU_SSE2;
2906   if (_cpuid_info.std_cpuid1_ecx.bits.sse3 != 0)
2907     result |= CPU_SSE3;
2908   if (_cpuid_info.std_cpuid1_ecx.bits.ssse3 != 0)
2909     result |= CPU_SSSE3;
2910   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_1 != 0)
2911     result |= CPU_SSE4_1;
2912   if (_cpuid_info.std_cpuid1_ecx.bits.sse4_2 != 0)
2913     result |= CPU_SSE4_2;
2914   if (_cpuid_info.std_cpuid1_ecx.bits.popcnt != 0)
2915     result |= CPU_POPCNT;
2916   if (_cpuid_info.std_cpuid1_ecx.bits.avx != 0 &&
2917       _cpuid_info.std_cpuid1_ecx.bits.osxsave != 0 &&
2918       _cpuid_info.xem_xcr0_eax.bits.sse != 0 &&
2919       _cpuid_info.xem_xcr0_eax.bits.ymm != 0) {
2920     result |= CPU_AVX;
2921     result |= CPU_VZEROUPPER;
2922     if (_cpuid_info.std_cpuid1_ecx.bits.f16c != 0)
2923       result |= CPU_F16C;
2924     if (_cpuid_info.sef_cpuid7_ebx.bits.avx2 != 0)
2925       result |= CPU_AVX2;
2926     if (_cpuid_info.sef_cpuid7_ebx.bits.avx512f != 0 &&
2927         _cpuid_info.xem_xcr0_eax.bits.opmask != 0 &&
2928         _cpuid_info.xem_xcr0_eax.bits.zmm512 != 0 &&
2929         _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
2930       result |= CPU_AVX512F;
2931       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
2932         result |= CPU_AVX512CD;
2933       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
2934         result |= CPU_AVX512DQ;
2935       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512ifma != 0)
2936         result |= CPU_AVX512_IFMA;
2937       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
2938         result |= CPU_AVX512PF;
2939       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
2940         result |= CPU_AVX512ER;
2941       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
2942         result |= CPU_AVX512BW;
2943       if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
2944         result |= CPU_AVX512VL;
2945       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2946         result |= CPU_AVX512_VPOPCNTDQ;
2947       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2948         result |= CPU_AVX512_VPCLMULQDQ;
2949       if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
2950         result |= CPU_AVX512_VAES;
2951       if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
2952         result |= CPU_GFNI;
2953       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
2954         result |= CPU_AVX512_VNNI;
2955       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2956         result |= CPU_AVX512_BITALG;
2957       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2958         result |= CPU_AVX512_VBMI;
2959       if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2960         result |= CPU_AVX512_VBMI2;
2961     }
2962   }
2963   if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
2964     result |= CPU_HV;
2965   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
2966     result |= CPU_BMI1;
2967   if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
2968     result |= CPU_TSC;
2969   if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
2970     result |= CPU_TSCINV_BIT;
2971   if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
2972     result |= CPU_AES;
2973   if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
2974     result |= CPU_ERMS;
2975   if (_cpuid_info.sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2976     result |= CPU_FSRM;
2977   if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
2978     result |= CPU_CLMUL;
2979   if (_cpuid_info.sef_cpuid7_ebx.bits.rtm != 0)
2980     result |= CPU_RTM;
2981   if (_cpuid_info.sef_cpuid7_ebx.bits.adx != 0)
2982      result |= CPU_ADX;
2983   if (_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
2984     result |= CPU_BMI2;
2985   if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
2986     result |= CPU_SHA;
2987   if (_cpuid_info.std_cpuid1_ecx.bits.fma != 0)
2988     result |= CPU_FMA;
2989   if (_cpuid_info.sef_cpuid7_ebx.bits.clflushopt != 0)
2990     result |= CPU_FLUSHOPT;
2991   if (_cpuid_info.ext_cpuid1_edx.bits.rdtscp != 0)
2992     result |= CPU_RDTSCP;
2993   if (_cpuid_info.sef_cpuid7_ecx.bits.rdpid != 0)
2994     result |= CPU_RDPID;
2995 
2996   // AMD|Hygon features.
2997   if (is_amd_family()) {
2998     if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
2999         (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
3000       result |= CPU_3DNOW_PREFETCH;
3001     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
3002       result |= CPU_LZCNT;
3003     if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
3004       result |= CPU_SSE4A;
3005   }
3006 
3007   // Intel features.
3008   if (is_intel()) {
3009     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3010       result |= CPU_LZCNT;
3011     }
3012     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3013       result |= CPU_3DNOW_PREFETCH;
3014     }
3015     if (_cpuid_info.sef_cpuid7_ebx.bits.clwb != 0) {
3016       result |= CPU_CLWB;
3017     }
3018     if (_cpuid_info.sef_cpuid7_edx.bits.serialize != 0)
3019       result |= CPU_SERIALIZE;
3020   }
3021 
3022   // ZX features.
3023   if (is_zx()) {
3024     if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0) {
3025       result |= CPU_LZCNT;
3026     }
3027     if (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0) {
3028       result |= CPU_3DNOW_PREFETCH;
3029     }
3030   }
3031 
3032   // Protection key features.
3033   if (_cpuid_info.sef_cpuid7_ecx.bits.pku != 0) {
3034     result |= CPU_PKU;
3035   }
3036   if (_cpuid_info.sef_cpuid7_ecx.bits.ospke != 0) {
3037     result |= CPU_OSPKE;
3038   }
3039 
3040   // Control flow enforcement (CET) features.
3041   if (_cpuid_info.sef_cpuid7_ecx.bits.cet_ss != 0) {
3042     result |= CPU_CET_SS;
3043   }
3044   if (_cpuid_info.sef_cpuid7_edx.bits.cet_ibt != 0) {
3045     result |= CPU_CET_IBT;
3046   }
3047 
3048   // Composite features.
3049   if (supports_tscinv_bit() &&
3050       ((is_amd_family() && !is_amd_Barcelona()) ||
3051        is_intel_tsc_synched_at_init())) {
3052     result |= CPU_TSCINV;
3053   }
3054 
3055   return result;
3056 }
3057 
3058 bool VM_Version::os_supports_avx_vectors() {
3059   bool retVal = false;
3060   int nreg = 2 LP64_ONLY(+2);
3061   if (supports_evex()) {
3062     // Verify that OS save/restore all bits of EVEX registers
3063     // during signal processing.
3064     retVal = true;
3065     for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3066       if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3067         retVal = false;
3068         break;
3069       }
3070     }
3071   } else if (supports_avx()) {
3072     // Verify that OS save/restore all bits of AVX registers
3073     // during signal processing.
3074     retVal = true;
3075     for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3076       if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3077         retVal = false;
3078         break;
3079       }
3080     }
3081     // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3082     if (retVal == false) {
3083       // Verify that OS save/restore all bits of EVEX registers
3084       // during signal processing.
3085       retVal = true;
3086       for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3087         if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3088           retVal = false;
3089           break;
3090         }
3091       }
3092     }
3093   }
3094   return retVal;
3095 }
3096 
3097 uint VM_Version::cores_per_cpu() {
3098   uint result = 1;
3099   if (is_intel()) {
3100     bool supports_topology = supports_processor_topology();
3101     if (supports_topology) {
3102       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3103                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3104     }
3105     if (!supports_topology || result == 0) {
3106       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3107     }
3108   } else if (is_amd_family()) {
3109     result = (_cpuid_info.ext_cpuid8_ecx.bits.cores_per_cpu + 1);
3110   } else if (is_zx()) {
3111     bool supports_topology = supports_processor_topology();
3112     if (supports_topology) {
3113       result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3114                _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3115     }
3116     if (!supports_topology || result == 0) {
3117       result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3118     }
3119   }
3120   return result;
3121 }
3122 
3123 uint VM_Version::threads_per_core() {
3124   uint result = 1;
3125   if (is_intel() && supports_processor_topology()) {
3126     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3127   } else if (is_zx() && supports_processor_topology()) {
3128     result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3129   } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3130     if (cpu_family() >= 0x17) {
3131       result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3132     } else {
3133       result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3134                  cores_per_cpu();
3135     }
3136   }
3137   return (result == 0 ? 1 : result);
3138 }
3139 
3140 intx VM_Version::L1_line_size() {
3141   intx result = 0;
3142   if (is_intel()) {
3143     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3144   } else if (is_amd_family()) {
3145     result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3146   } else if (is_zx()) {
3147     result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3148   }
3149   if (result < 32) // not defined ?
3150     result = 32;   // 32 bytes by default on x86 and other x64
3151   return result;
3152 }
3153 
3154 bool VM_Version::is_intel_tsc_synched_at_init() {
3155   if (is_intel_family_core()) {
3156     uint32_t ext_model = extended_cpu_model();
3157     if (ext_model == CPU_MODEL_NEHALEM_EP     ||
3158         ext_model == CPU_MODEL_WESTMERE_EP    ||
3159         ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3160         ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3161       // <= 2-socket invariant tsc support. EX versions are usually used
3162       // in > 2-socket systems and likely don't synchronize tscs at
3163       // initialization.
3164       // Code that uses tsc values must be prepared for them to arbitrarily
3165       // jump forward or backward.
3166       return true;
3167     }
3168   }
3169   return false;
3170 }
3171 
3172 intx VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3173   // Hardware prefetching (distance/size in bytes):
3174   // Pentium 3 -  64 /  32
3175   // Pentium 4 - 256 / 128
3176   // Athlon    -  64 /  32 ????
3177   // Opteron   - 128 /  64 only when 2 sequential cache lines accessed
3178   // Core      - 128 /  64
3179   //
3180   // Software prefetching (distance in bytes / instruction with best score):
3181   // Pentium 3 - 128 / prefetchnta
3182   // Pentium 4 - 512 / prefetchnta
3183   // Athlon    - 128 / prefetchnta
3184   // Opteron   - 256 / prefetchnta
3185   // Core      - 256 / prefetchnta
3186   // It will be used only when AllocatePrefetchStyle > 0
3187 
3188   if (is_amd_family()) { // AMD | Hygon
3189     if (supports_sse2()) {
3190       return 256; // Opteron
3191     } else {
3192       return 128; // Athlon
3193     }
3194   } else { // Intel
3195     if (supports_sse3() && cpu_family() == 6) {
3196       if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3197         return 192;
3198       } else if (use_watermark_prefetch) { // watermark prefetching on Core
3199 #ifdef _LP64
3200         return 384;
3201 #else
3202         return 320;
3203 #endif
3204       }
3205     }
3206     if (supports_sse2()) {
3207       if (cpu_family() == 6) {
3208         return 256; // Pentium M, Core, Core2
3209       } else {
3210         return 512; // Pentium 4
3211       }
3212     } else {
3213       return 128; // Pentium 3 (and all other old CPUs)
3214     }
3215   }
3216 }
3217 
3218 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3219   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3220   switch (id) {
3221   case vmIntrinsics::_floatToFloat16:
3222   case vmIntrinsics::_float16ToFloat:
3223     if (!supports_float16()) {
3224       return false;
3225     }
3226     break;
3227   default:
3228     break;
3229   }
3230   return true;
3231 }
3232