1 /*
   2  * Copyright (c) 1997, 2022, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "jvm.h"
  27 #include "asm/macroAssembler.hpp"
  28 #include "asm/macroAssembler.inline.hpp"
  29 #include "code/codeBlob.hpp"
  30 #include "logging/log.hpp"
  31 #include "logging/logStream.hpp"
  32 #include "memory/resourceArea.hpp"
  33 #include "memory/universe.hpp"
  34 #include "runtime/globals_extension.hpp"
  35 #include "runtime/java.hpp"
  36 #include "runtime/os.hpp"
  37 #include "runtime/stubCodeGenerator.hpp"
  38 #include "runtime/vm_version.hpp"
  39 #include "utilities/powerOfTwo.hpp"
  40 #include "utilities/virtualizationSupport.hpp"
  41 
  42 #include OS_HEADER_INLINE(os)
  43 
  44 int VM_Version::_cpu;
  45 int VM_Version::_model;
  46 int VM_Version::_stepping;
  47 bool VM_Version::_has_intel_jcc_erratum;
  48 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
  49 
  50 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
  51 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
  52 #undef DECLARE_CPU_FEATURE_FLAG
  53 
  54 // Address of instruction which causes SEGV
  55 address VM_Version::_cpuinfo_segv_addr = 0;
  56 // Address of instruction after the one which causes SEGV
  57 address VM_Version::_cpuinfo_cont_addr = 0;
  58 
  59 static BufferBlob* stub_blob;
  60 static const int stub_size = 2000;
  61 
  62 extern "C" {
  63   typedef void (*get_cpu_info_stub_t)(void*);
  64   typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
  65 }
  66 static get_cpu_info_stub_t get_cpu_info_stub = NULL;
  67 static detect_virt_stub_t detect_virt_stub = NULL;
  68 
  69 #ifdef _LP64
  70 
  71 bool VM_Version::supports_clflush() {
  72   // clflush should always be available on x86_64
  73   // if not we are in real trouble because we rely on it
  74   // to flush the code cache.
  75   // Unfortunately, Assembler::clflush is currently called as part
  76   // of generation of the code cache flush routine. This happens
  77   // under Universe::init before the processor features are set
  78   // up. Assembler::flush calls this routine to check that clflush
  79   // is allowed. So, we give the caller a free pass if Universe init
  80   // is still in progress.
  81   assert ((!Universe::is_fully_initialized() || (_features & CPU_FLUSH) != 0), "clflush should be available");
  82   return true;
  83 }
  84 #endif
  85 
  86 #define CPUID_STANDARD_FN   0x0
  87 #define CPUID_STANDARD_FN_1 0x1
  88 #define CPUID_STANDARD_FN_4 0x4
  89 #define CPUID_STANDARD_FN_B 0xb
  90 
  91 #define CPUID_EXTENDED_FN   0x80000000
  92 #define CPUID_EXTENDED_FN_1 0x80000001
  93 #define CPUID_EXTENDED_FN_2 0x80000002
  94 #define CPUID_EXTENDED_FN_3 0x80000003
  95 #define CPUID_EXTENDED_FN_4 0x80000004
  96 #define CPUID_EXTENDED_FN_7 0x80000007
  97 #define CPUID_EXTENDED_FN_8 0x80000008
  98 
  99 class VM_Version_StubGenerator: public StubCodeGenerator {
 100  public:
 101 
 102   VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
 103 
 104   address generate_get_cpu_info() {
 105     // Flags to test CPU type.
 106     const uint32_t HS_EFL_AC = 0x40000;
 107     const uint32_t HS_EFL_ID = 0x200000;
 108     // Values for when we don't have a CPUID instruction.
 109     const int      CPU_FAMILY_SHIFT = 8;
 110     const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
 111     const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
 112     bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
 113 
 114     Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4;
 115     Label sef_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7, ext_cpuid8, done, wrapup;
 116     Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
 117 
 118     StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
 119 #   define __ _masm->
 120 
 121     address start = __ pc();
 122 
 123     //
 124     // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
 125     //
 126     // LP64: rcx and rdx are first and second argument registers on windows
 127 
 128     __ push(rbp);
 129 #ifdef _LP64
 130     __ mov(rbp, c_rarg0); // cpuid_info address
 131 #else
 132     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 133 #endif
 134     __ push(rbx);
 135     __ push(rsi);
 136     __ pushf();          // preserve rbx, and flags
 137     __ pop(rax);
 138     __ push(rax);
 139     __ mov(rcx, rax);
 140     //
 141     // if we are unable to change the AC flag, we have a 386
 142     //
 143     __ xorl(rax, HS_EFL_AC);
 144     __ push(rax);
 145     __ popf();
 146     __ pushf();
 147     __ pop(rax);
 148     __ cmpptr(rax, rcx);
 149     __ jccb(Assembler::notEqual, detect_486);
 150 
 151     __ movl(rax, CPU_FAMILY_386);
 152     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 153     __ jmp(done);
 154 
 155     //
 156     // If we are unable to change the ID flag, we have a 486 which does
 157     // not support the "cpuid" instruction.
 158     //
 159     __ bind(detect_486);
 160     __ mov(rax, rcx);
 161     __ xorl(rax, HS_EFL_ID);
 162     __ push(rax);
 163     __ popf();
 164     __ pushf();
 165     __ pop(rax);
 166     __ cmpptr(rcx, rax);
 167     __ jccb(Assembler::notEqual, detect_586);
 168 
 169     __ bind(cpu486);
 170     __ movl(rax, CPU_FAMILY_486);
 171     __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
 172     __ jmp(done);
 173 
 174     //
 175     // At this point, we have a chip which supports the "cpuid" instruction
 176     //
 177     __ bind(detect_586);
 178     __ xorl(rax, rax);
 179     __ cpuid();
 180     __ orl(rax, rax);
 181     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 182                                         // value of at least 1, we give up and
 183                                         // assume a 486
 184     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 185     __ movl(Address(rsi, 0), rax);
 186     __ movl(Address(rsi, 4), rbx);
 187     __ movl(Address(rsi, 8), rcx);
 188     __ movl(Address(rsi,12), rdx);
 189 
 190     __ cmpl(rax, 0xa);                  // Is cpuid(0xB) supported?
 191     __ jccb(Assembler::belowEqual, std_cpuid4);
 192 
 193     //
 194     // cpuid(0xB) Processor Topology
 195     //
 196     __ movl(rax, 0xb);
 197     __ xorl(rcx, rcx);   // Threads level
 198     __ cpuid();
 199 
 200     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
 201     __ movl(Address(rsi, 0), rax);
 202     __ movl(Address(rsi, 4), rbx);
 203     __ movl(Address(rsi, 8), rcx);
 204     __ movl(Address(rsi,12), rdx);
 205 
 206     __ movl(rax, 0xb);
 207     __ movl(rcx, 1);     // Cores level
 208     __ cpuid();
 209     __ push(rax);
 210     __ andl(rax, 0x1f);  // Determine if valid topology level
 211     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 212     __ andl(rax, 0xffff);
 213     __ pop(rax);
 214     __ jccb(Assembler::equal, std_cpuid4);
 215 
 216     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
 217     __ movl(Address(rsi, 0), rax);
 218     __ movl(Address(rsi, 4), rbx);
 219     __ movl(Address(rsi, 8), rcx);
 220     __ movl(Address(rsi,12), rdx);
 221 
 222     __ movl(rax, 0xb);
 223     __ movl(rcx, 2);     // Packages level
 224     __ cpuid();
 225     __ push(rax);
 226     __ andl(rax, 0x1f);  // Determine if valid topology level
 227     __ orl(rax, rbx);    // eax[4:0] | ebx[0:15] == 0 indicates invalid level
 228     __ andl(rax, 0xffff);
 229     __ pop(rax);
 230     __ jccb(Assembler::equal, std_cpuid4);
 231 
 232     __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
 233     __ movl(Address(rsi, 0), rax);
 234     __ movl(Address(rsi, 4), rbx);
 235     __ movl(Address(rsi, 8), rcx);
 236     __ movl(Address(rsi,12), rdx);
 237 
 238     //
 239     // cpuid(0x4) Deterministic cache params
 240     //
 241     __ bind(std_cpuid4);
 242     __ movl(rax, 4);
 243     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
 244     __ jccb(Assembler::greater, std_cpuid1);
 245 
 246     __ xorl(rcx, rcx);   // L1 cache
 247     __ cpuid();
 248     __ push(rax);
 249     __ andl(rax, 0x1f);  // Determine if valid cache parameters used
 250     __ orl(rax, rax);    // eax[4:0] == 0 indicates invalid cache
 251     __ pop(rax);
 252     __ jccb(Assembler::equal, std_cpuid1);
 253 
 254     __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
 255     __ movl(Address(rsi, 0), rax);
 256     __ movl(Address(rsi, 4), rbx);
 257     __ movl(Address(rsi, 8), rcx);
 258     __ movl(Address(rsi,12), rdx);
 259 
 260     //
 261     // Standard cpuid(0x1)
 262     //
 263     __ bind(std_cpuid1);
 264     __ movl(rax, 1);
 265     __ cpuid();
 266     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 267     __ movl(Address(rsi, 0), rax);
 268     __ movl(Address(rsi, 4), rbx);
 269     __ movl(Address(rsi, 8), rcx);
 270     __ movl(Address(rsi,12), rdx);
 271 
 272     //
 273     // Check if OS has enabled XGETBV instruction to access XCR0
 274     // (OSXSAVE feature flag) and CPU supports AVX
 275     //
 276     __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 277     __ cmpl(rcx, 0x18000000);
 278     __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
 279 
 280     //
 281     // XCR0, XFEATURE_ENABLED_MASK register
 282     //
 283     __ xorl(rcx, rcx);   // zero for XCR0 register
 284     __ xgetbv();
 285     __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
 286     __ movl(Address(rsi, 0), rax);
 287     __ movl(Address(rsi, 4), rdx);
 288 
 289     //
 290     // cpuid(0x7) Structured Extended Features
 291     //
 292     __ bind(sef_cpuid);
 293     __ movl(rax, 7);
 294     __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
 295     __ jccb(Assembler::greater, ext_cpuid);
 296 
 297     __ xorl(rcx, rcx);
 298     __ cpuid();
 299     __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 300     __ movl(Address(rsi, 0), rax);
 301     __ movl(Address(rsi, 4), rbx);
 302     __ movl(Address(rsi, 8), rcx);
 303     __ movl(Address(rsi, 12), rdx);
 304 
 305     //
 306     // Extended cpuid(0x80000000)
 307     //
 308     __ bind(ext_cpuid);
 309     __ movl(rax, 0x80000000);
 310     __ cpuid();
 311     __ cmpl(rax, 0x80000000);     // Is cpuid(0x80000001) supported?
 312     __ jcc(Assembler::belowEqual, done);
 313     __ cmpl(rax, 0x80000004);     // Is cpuid(0x80000005) supported?
 314     __ jcc(Assembler::belowEqual, ext_cpuid1);
 315     __ cmpl(rax, 0x80000006);     // Is cpuid(0x80000007) supported?
 316     __ jccb(Assembler::belowEqual, ext_cpuid5);
 317     __ cmpl(rax, 0x80000007);     // Is cpuid(0x80000008) supported?
 318     __ jccb(Assembler::belowEqual, ext_cpuid7);
 319     __ cmpl(rax, 0x80000008);     // Is cpuid(0x80000009 and above) supported?
 320     __ jccb(Assembler::belowEqual, ext_cpuid8);
 321     __ cmpl(rax, 0x8000001E);     // Is cpuid(0x8000001E) supported?
 322     __ jccb(Assembler::below, ext_cpuid8);
 323     //
 324     // Extended cpuid(0x8000001E)
 325     //
 326     __ movl(rax, 0x8000001E);
 327     __ cpuid();
 328     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
 329     __ movl(Address(rsi, 0), rax);
 330     __ movl(Address(rsi, 4), rbx);
 331     __ movl(Address(rsi, 8), rcx);
 332     __ movl(Address(rsi,12), rdx);
 333 
 334     //
 335     // Extended cpuid(0x80000008)
 336     //
 337     __ bind(ext_cpuid8);
 338     __ movl(rax, 0x80000008);
 339     __ cpuid();
 340     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
 341     __ movl(Address(rsi, 0), rax);
 342     __ movl(Address(rsi, 4), rbx);
 343     __ movl(Address(rsi, 8), rcx);
 344     __ movl(Address(rsi,12), rdx);
 345 
 346     //
 347     // Extended cpuid(0x80000007)
 348     //
 349     __ bind(ext_cpuid7);
 350     __ movl(rax, 0x80000007);
 351     __ cpuid();
 352     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
 353     __ movl(Address(rsi, 0), rax);
 354     __ movl(Address(rsi, 4), rbx);
 355     __ movl(Address(rsi, 8), rcx);
 356     __ movl(Address(rsi,12), rdx);
 357 
 358     //
 359     // Extended cpuid(0x80000005)
 360     //
 361     __ bind(ext_cpuid5);
 362     __ movl(rax, 0x80000005);
 363     __ cpuid();
 364     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
 365     __ movl(Address(rsi, 0), rax);
 366     __ movl(Address(rsi, 4), rbx);
 367     __ movl(Address(rsi, 8), rcx);
 368     __ movl(Address(rsi,12), rdx);
 369 
 370     //
 371     // Extended cpuid(0x80000001)
 372     //
 373     __ bind(ext_cpuid1);
 374     __ movl(rax, 0x80000001);
 375     __ cpuid();
 376     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
 377     __ movl(Address(rsi, 0), rax);
 378     __ movl(Address(rsi, 4), rbx);
 379     __ movl(Address(rsi, 8), rcx);
 380     __ movl(Address(rsi,12), rdx);
 381 
 382     //
 383     // Check if OS has enabled XGETBV instruction to access XCR0
 384     // (OSXSAVE feature flag) and CPU supports AVX
 385     //
 386     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 387     __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
 388     __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
 389     __ cmpl(rcx, 0x18000000);
 390     __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
 391 
 392     __ movl(rax, 0x6);
 393     __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 394     __ cmpl(rax, 0x6);
 395     __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
 396 
 397     // we need to bridge farther than imm8, so we use this island as a thunk
 398     __ bind(done);
 399     __ jmp(wrapup);
 400 
 401     __ bind(start_simd_check);
 402     //
 403     // Some OSs have a bug when upper 128/256bits of YMM/ZMM
 404     // registers are not restored after a signal processing.
 405     // Generate SEGV here (reference through NULL)
 406     // and check upper YMM/ZMM bits after it.
 407     //
 408     intx saved_useavx = UseAVX;
 409     intx saved_usesse = UseSSE;
 410 
 411     // If UseAVX is uninitialized or is set by the user to include EVEX
 412     if (use_evex) {
 413       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 414       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 415       __ movl(rax, 0x10000);
 416       __ andl(rax, Address(rsi, 4)); // xcr0 bits sse | ymm
 417       __ cmpl(rax, 0x10000);
 418       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 419       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 420       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 421       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 422       __ movl(rax, 0xE0);
 423       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 424       __ cmpl(rax, 0xE0);
 425       __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
 426 
 427       if (FLAG_IS_DEFAULT(UseAVX)) {
 428         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 429         __ movl(rax, Address(rsi, 0));
 430         __ cmpl(rax, 0x50654);              // If it is Skylake
 431         __ jcc(Assembler::equal, legacy_setup);
 432       }
 433       // EVEX setup: run in lowest evex mode
 434       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 435       UseAVX = 3;
 436       UseSSE = 2;
 437 #ifdef _WINDOWS
 438       // xmm5-xmm15 are not preserved by caller on windows
 439       // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
 440       __ subptr(rsp, 64);
 441       __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
 442 #ifdef _LP64
 443       __ subptr(rsp, 64);
 444       __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
 445       __ subptr(rsp, 64);
 446       __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
 447 #endif // _LP64
 448 #endif // _WINDOWS
 449 
 450       // load value into all 64 bytes of zmm7 register
 451       __ movl(rcx, VM_Version::ymm_test_value());
 452       __ movdl(xmm0, rcx);
 453       __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
 454       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 455 #ifdef _LP64
 456       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
 457       __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
 458 #endif
 459       VM_Version::clean_cpuFeatures();
 460       __ jmp(save_restore_except);
 461     }
 462 
 463     __ bind(legacy_setup);
 464     // AVX setup
 465     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 466     UseAVX = 1;
 467     UseSSE = 2;
 468 #ifdef _WINDOWS
 469     __ subptr(rsp, 32);
 470     __ vmovdqu(Address(rsp, 0), xmm7);
 471 #ifdef _LP64
 472     __ subptr(rsp, 32);
 473     __ vmovdqu(Address(rsp, 0), xmm8);
 474     __ subptr(rsp, 32);
 475     __ vmovdqu(Address(rsp, 0), xmm15);
 476 #endif // _LP64
 477 #endif // _WINDOWS
 478 
 479     // load value into all 32 bytes of ymm7 register
 480     __ movl(rcx, VM_Version::ymm_test_value());
 481 
 482     __ movdl(xmm0, rcx);
 483     __ pshufd(xmm0, xmm0, 0x00);
 484     __ vinsertf128_high(xmm0, xmm0);
 485     __ vmovdqu(xmm7, xmm0);
 486 #ifdef _LP64
 487     __ vmovdqu(xmm8, xmm0);
 488     __ vmovdqu(xmm15, xmm0);
 489 #endif
 490     VM_Version::clean_cpuFeatures();
 491 
 492     __ bind(save_restore_except);
 493     __ xorl(rsi, rsi);
 494     VM_Version::set_cpuinfo_segv_addr(__ pc());
 495     // Generate SEGV
 496     __ movl(rax, Address(rsi, 0));
 497 
 498     VM_Version::set_cpuinfo_cont_addr(__ pc());
 499     // Returns here after signal. Save xmm0 to check it later.
 500 
 501     // If UseAVX is uninitialized or is set by the user to include EVEX
 502     if (use_evex) {
 503       // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
 504       __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
 505       __ movl(rax, 0x10000);
 506       __ andl(rax, Address(rsi, 4));
 507       __ cmpl(rax, 0x10000);
 508       __ jcc(Assembler::notEqual, legacy_save_restore);
 509       // check _cpuid_info.xem_xcr0_eax.bits.opmask
 510       // check _cpuid_info.xem_xcr0_eax.bits.zmm512
 511       // check _cpuid_info.xem_xcr0_eax.bits.zmm32
 512       __ movl(rax, 0xE0);
 513       __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
 514       __ cmpl(rax, 0xE0);
 515       __ jcc(Assembler::notEqual, legacy_save_restore);
 516 
 517       if (FLAG_IS_DEFAULT(UseAVX)) {
 518         __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 519         __ movl(rax, Address(rsi, 0));
 520         __ cmpl(rax, 0x50654);              // If it is Skylake
 521         __ jcc(Assembler::equal, legacy_save_restore);
 522       }
 523       // EVEX check: run in lowest evex mode
 524       VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
 525       UseAVX = 3;
 526       UseSSE = 2;
 527       __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
 528       __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
 529       __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
 530 #ifdef _LP64
 531       __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
 532       __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
 533 #endif
 534 
 535 #ifdef _WINDOWS
 536 #ifdef _LP64
 537       __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
 538       __ addptr(rsp, 64);
 539       __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
 540       __ addptr(rsp, 64);
 541 #endif // _LP64
 542       __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
 543       __ addptr(rsp, 64);
 544 #endif // _WINDOWS
 545       generate_vzeroupper(wrapup);
 546       VM_Version::clean_cpuFeatures();
 547       UseAVX = saved_useavx;
 548       UseSSE = saved_usesse;
 549       __ jmp(wrapup);
 550    }
 551 
 552     __ bind(legacy_save_restore);
 553     // AVX check
 554     VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
 555     UseAVX = 1;
 556     UseSSE = 2;
 557     __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
 558     __ vmovdqu(Address(rsi, 0), xmm0);
 559     __ vmovdqu(Address(rsi, 32), xmm7);
 560 #ifdef _LP64
 561     __ vmovdqu(Address(rsi, 64), xmm8);
 562     __ vmovdqu(Address(rsi, 96), xmm15);
 563 #endif
 564 
 565 #ifdef _WINDOWS
 566 #ifdef _LP64
 567     __ vmovdqu(xmm15, Address(rsp, 0));
 568     __ addptr(rsp, 32);
 569     __ vmovdqu(xmm8, Address(rsp, 0));
 570     __ addptr(rsp, 32);
 571 #endif // _LP64
 572     __ vmovdqu(xmm7, Address(rsp, 0));
 573     __ addptr(rsp, 32);
 574 #endif // _WINDOWS
 575     generate_vzeroupper(wrapup);
 576     VM_Version::clean_cpuFeatures();
 577     UseAVX = saved_useavx;
 578     UseSSE = saved_usesse;
 579 
 580     __ bind(wrapup);
 581     __ popf();
 582     __ pop(rsi);
 583     __ pop(rbx);
 584     __ pop(rbp);
 585     __ ret(0);
 586 
 587 #   undef __
 588 
 589     return start;
 590   };
 591   void generate_vzeroupper(Label& L_wrapup) {
 592 #   define __ _masm->
 593     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
 594     __ cmpl(Address(rsi, 4), 0x756e6547);  // 'uneG'
 595     __ jcc(Assembler::notEqual, L_wrapup);
 596     __ movl(rcx, 0x0FFF0FF0);
 597     __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
 598     __ andl(rcx, Address(rsi, 0));
 599     __ cmpl(rcx, 0x00050670);              // If it is Xeon Phi 3200/5200/7200
 600     __ jcc(Assembler::equal, L_wrapup);
 601     __ cmpl(rcx, 0x00080650);              // If it is Future Xeon Phi
 602     __ jcc(Assembler::equal, L_wrapup);
 603     // vzeroupper() will use a pre-computed instruction sequence that we
 604     // can't compute until after we've determined CPU capabilities. Use
 605     // uncached variant here directly to be able to bootstrap correctly
 606     __ vzeroupper_uncached();
 607 #   undef __
 608   }
 609   address generate_detect_virt() {
 610     StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
 611 #   define __ _masm->
 612 
 613     address start = __ pc();
 614 
 615     // Evacuate callee-saved registers
 616     __ push(rbp);
 617     __ push(rbx);
 618     __ push(rsi); // for Windows
 619 
 620 #ifdef _LP64
 621     __ mov(rax, c_rarg0); // CPUID leaf
 622     __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
 623 #else
 624     __ movptr(rax, Address(rsp, 16)); // CPUID leaf
 625     __ movptr(rsi, Address(rsp, 20)); // register array address
 626 #endif
 627 
 628     __ cpuid();
 629 
 630     // Store result to register array
 631     __ movl(Address(rsi,  0), rax);
 632     __ movl(Address(rsi,  4), rbx);
 633     __ movl(Address(rsi,  8), rcx);
 634     __ movl(Address(rsi, 12), rdx);
 635 
 636     // Epilogue
 637     __ pop(rsi);
 638     __ pop(rbx);
 639     __ pop(rbp);
 640     __ ret(0);
 641 
 642 #   undef __
 643 
 644     return start;
 645   };
 646 
 647 
 648   address generate_getCPUIDBrandString(void) {
 649     // Flags to test CPU type.
 650     const uint32_t HS_EFL_AC           = 0x40000;
 651     const uint32_t HS_EFL_ID           = 0x200000;
 652     // Values for when we don't have a CPUID instruction.
 653     const int      CPU_FAMILY_SHIFT = 8;
 654     const uint32_t CPU_FAMILY_386   = (3 << CPU_FAMILY_SHIFT);
 655     const uint32_t CPU_FAMILY_486   = (4 << CPU_FAMILY_SHIFT);
 656 
 657     Label detect_486, cpu486, detect_586, done, ext_cpuid;
 658 
 659     StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
 660 #   define __ _masm->
 661 
 662     address start = __ pc();
 663 
 664     //
 665     // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
 666     //
 667     // LP64: rcx and rdx are first and second argument registers on windows
 668 
 669     __ push(rbp);
 670 #ifdef _LP64
 671     __ mov(rbp, c_rarg0); // cpuid_info address
 672 #else
 673     __ movptr(rbp, Address(rsp, 8)); // cpuid_info address
 674 #endif
 675     __ push(rbx);
 676     __ push(rsi);
 677     __ pushf();          // preserve rbx, and flags
 678     __ pop(rax);
 679     __ push(rax);
 680     __ mov(rcx, rax);
 681     //
 682     // if we are unable to change the AC flag, we have a 386
 683     //
 684     __ xorl(rax, HS_EFL_AC);
 685     __ push(rax);
 686     __ popf();
 687     __ pushf();
 688     __ pop(rax);
 689     __ cmpptr(rax, rcx);
 690     __ jccb(Assembler::notEqual, detect_486);
 691 
 692     __ movl(rax, CPU_FAMILY_386);
 693     __ jmp(done);
 694 
 695     //
 696     // If we are unable to change the ID flag, we have a 486 which does
 697     // not support the "cpuid" instruction.
 698     //
 699     __ bind(detect_486);
 700     __ mov(rax, rcx);
 701     __ xorl(rax, HS_EFL_ID);
 702     __ push(rax);
 703     __ popf();
 704     __ pushf();
 705     __ pop(rax);
 706     __ cmpptr(rcx, rax);
 707     __ jccb(Assembler::notEqual, detect_586);
 708 
 709     __ bind(cpu486);
 710     __ movl(rax, CPU_FAMILY_486);
 711     __ jmp(done);
 712 
 713     //
 714     // At this point, we have a chip which supports the "cpuid" instruction
 715     //
 716     __ bind(detect_586);
 717     __ xorl(rax, rax);
 718     __ cpuid();
 719     __ orl(rax, rax);
 720     __ jcc(Assembler::equal, cpu486);   // if cpuid doesn't support an input
 721                                         // value of at least 1, we give up and
 722                                         // assume a 486
 723 
 724     //
 725     // Extended cpuid(0x80000000) for processor brand string detection
 726     //
 727     __ bind(ext_cpuid);
 728     __ movl(rax, CPUID_EXTENDED_FN);
 729     __ cpuid();
 730     __ cmpl(rax, CPUID_EXTENDED_FN_4);
 731     __ jcc(Assembler::below, done);
 732 
 733     //
 734     // Extended cpuid(0x80000002)  // first 16 bytes in brand string
 735     //
 736     __ movl(rax, CPUID_EXTENDED_FN_2);
 737     __ cpuid();
 738     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
 739     __ movl(Address(rsi, 0), rax);
 740     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
 741     __ movl(Address(rsi, 0), rbx);
 742     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
 743     __ movl(Address(rsi, 0), rcx);
 744     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
 745     __ movl(Address(rsi,0), rdx);
 746 
 747     //
 748     // Extended cpuid(0x80000003) // next 16 bytes in brand string
 749     //
 750     __ movl(rax, CPUID_EXTENDED_FN_3);
 751     __ cpuid();
 752     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
 753     __ movl(Address(rsi, 0), rax);
 754     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
 755     __ movl(Address(rsi, 0), rbx);
 756     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
 757     __ movl(Address(rsi, 0), rcx);
 758     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
 759     __ movl(Address(rsi,0), rdx);
 760 
 761     //
 762     // Extended cpuid(0x80000004) // last 16 bytes in brand string
 763     //
 764     __ movl(rax, CPUID_EXTENDED_FN_4);
 765     __ cpuid();
 766     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
 767     __ movl(Address(rsi, 0), rax);
 768     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
 769     __ movl(Address(rsi, 0), rbx);
 770     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
 771     __ movl(Address(rsi, 0), rcx);
 772     __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
 773     __ movl(Address(rsi,0), rdx);
 774 
 775     //
 776     // return
 777     //
 778     __ bind(done);
 779     __ popf();
 780     __ pop(rsi);
 781     __ pop(rbx);
 782     __ pop(rbp);
 783     __ ret(0);
 784 
 785 #   undef __
 786 
 787     return start;
 788   };
 789 };
 790 
 791 void VM_Version::get_processor_features() {
 792 
 793   _cpu = 4; // 486 by default
 794   _model = 0;
 795   _stepping = 0;
 796   _features = 0;
 797   _logical_processors_per_package = 1;
 798   // i486 internal cache is both I&D and has a 16-byte line size
 799   _L1_data_cache_line_size = 16;
 800 
 801   // Get raw processor info
 802 
 803   get_cpu_info_stub(&_cpuid_info);
 804 
 805   assert_is_initialized();
 806   _cpu = extended_cpu_family();
 807   _model = extended_cpu_model();
 808   _stepping = cpu_stepping();
 809 
 810   if (cpu_family() > 4) { // it supports CPUID
 811     _features = feature_flags();
 812     // Logical processors are only available on P4s and above,
 813     // and only if hyperthreading is available.
 814     _logical_processors_per_package = logical_processor_count();
 815     _L1_data_cache_line_size = L1_line_size();
 816   }
 817 
 818   _supports_cx8 = supports_cmpxchg8();
 819   // xchg and xadd instructions
 820   _supports_atomic_getset4 = true;
 821   _supports_atomic_getadd4 = true;
 822   LP64_ONLY(_supports_atomic_getset8 = true);
 823   LP64_ONLY(_supports_atomic_getadd8 = true);
 824 
 825 #ifdef _LP64
 826   // OS should support SSE for x64 and hardware should support at least SSE2.
 827   if (!VM_Version::supports_sse2()) {
 828     vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
 829   }
 830   // in 64 bit the use of SSE2 is the minimum
 831   if (UseSSE < 2) UseSSE = 2;
 832 #endif
 833 
 834 #ifdef AMD64
 835   // flush_icache_stub have to be generated first.
 836   // That is why Icache line size is hard coded in ICache class,
 837   // see icache_x86.hpp. It is also the reason why we can't use
 838   // clflush instruction in 32-bit VM since it could be running
 839   // on CPU which does not support it.
 840   //
 841   // The only thing we can do is to verify that flushed
 842   // ICache::line_size has correct value.
 843   guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
 844   // clflush_size is size in quadwords (8 bytes).
 845   guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
 846 #endif
 847 
 848 #ifdef _LP64
 849   // assigning this field effectively enables Unsafe.writebackMemory()
 850   // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
 851   // that is only implemented on x86_64 and only if the OS plays ball
 852   if (os::supports_map_sync()) {
 853     // publish data cache line flush size to generic field, otherwise
 854     // let if default to zero thereby disabling writeback
 855     _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
 856   }
 857 #endif
 858   // If the OS doesn't support SSE, we can't use this feature even if the HW does
 859   if (!os::supports_sse())
 860     _features &= ~(CPU_SSE|CPU_SSE2|CPU_SSE3|CPU_SSSE3|CPU_SSE4A|CPU_SSE4_1|CPU_SSE4_2);
 861 
 862   if (UseSSE < 4) {
 863     _features &= ~CPU_SSE4_1;
 864     _features &= ~CPU_SSE4_2;
 865   }
 866 
 867   if (UseSSE < 3) {
 868     _features &= ~CPU_SSE3;
 869     _features &= ~CPU_SSSE3;
 870     _features &= ~CPU_SSE4A;
 871   }
 872 
 873   if (UseSSE < 2)
 874     _features &= ~CPU_SSE2;
 875 
 876   if (UseSSE < 1)
 877     _features &= ~CPU_SSE;
 878 
 879   //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
 880   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
 881     UseAVX = 0;
 882   }
 883 
 884   // first try initial setting and detect what we can support
 885   int use_avx_limit = 0;
 886   if (UseAVX > 0) {
 887     if (UseAVX > 2 && supports_evex()) {
 888       use_avx_limit = 3;
 889     } else if (UseAVX > 1 && supports_avx2()) {
 890       use_avx_limit = 2;
 891     } else if (UseAVX > 0 && supports_avx()) {
 892       use_avx_limit = 1;
 893     } else {
 894       use_avx_limit = 0;
 895     }
 896   }
 897   if (FLAG_IS_DEFAULT(UseAVX)) {
 898     // Don't use AVX-512 on older Skylakes unless explicitly requested.
 899     if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
 900       FLAG_SET_DEFAULT(UseAVX, 2);
 901     } else {
 902       FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 903     }
 904   }
 905   if (UseAVX > use_avx_limit) {
 906     warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", (int) UseAVX, use_avx_limit);
 907     FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
 908   } else if (UseAVX < 0) {
 909     warning("UseAVX=%d is not valid, setting it to UseAVX=0", (int) UseAVX);
 910     FLAG_SET_DEFAULT(UseAVX, 0);
 911   }
 912 
 913   if (UseAVX < 3) {
 914     _features &= ~CPU_AVX512F;
 915     _features &= ~CPU_AVX512DQ;
 916     _features &= ~CPU_AVX512CD;
 917     _features &= ~CPU_AVX512BW;
 918     _features &= ~CPU_AVX512VL;
 919     _features &= ~CPU_AVX512_VPOPCNTDQ;
 920     _features &= ~CPU_AVX512_VPCLMULQDQ;
 921     _features &= ~CPU_AVX512_VAES;
 922     _features &= ~CPU_AVX512_VNNI;
 923     _features &= ~CPU_AVX512_VBMI;
 924     _features &= ~CPU_AVX512_VBMI2;
 925     _features &= ~CPU_AVX512_BITALG;
 926   }
 927 
 928   if (UseAVX < 2)
 929     _features &= ~CPU_AVX2;
 930 
 931   if (UseAVX < 1) {
 932     _features &= ~CPU_AVX;
 933     _features &= ~CPU_VZEROUPPER;
 934   }
 935 
 936   if (logical_processors_per_package() == 1) {
 937     // HT processor could be installed on a system which doesn't support HT.
 938     _features &= ~CPU_HT;
 939   }
 940 
 941   if (is_intel()) { // Intel cpus specific settings
 942     if (is_knights_family()) {
 943       _features &= ~CPU_VZEROUPPER;
 944       _features &= ~CPU_AVX512BW;
 945       _features &= ~CPU_AVX512VL;
 946       _features &= ~CPU_AVX512DQ;
 947       _features &= ~CPU_AVX512_VNNI;
 948       _features &= ~CPU_AVX512_VAES;
 949       _features &= ~CPU_AVX512_VPOPCNTDQ;
 950       _features &= ~CPU_AVX512_VPCLMULQDQ;
 951       _features &= ~CPU_AVX512_VBMI;
 952       _features &= ~CPU_AVX512_VBMI2;
 953       _features &= ~CPU_CLWB;
 954       _features &= ~CPU_FLUSHOPT;
 955       _features &= ~CPU_GFNI;
 956       _features &= ~CPU_AVX512_BITALG;
 957     }
 958   }
 959 
 960   if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
 961     _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
 962   } else {
 963     _has_intel_jcc_erratum = IntelJccErratumMitigation;
 964   }
 965 
 966   char buf[512];
 967   int res = jio_snprintf(
 968               buf, sizeof(buf),
 969               "(%u cores per cpu, %u threads per core) family %d model %d stepping %d microcode 0x%x",
 970               cores_per_cpu(), threads_per_core(),
 971               cpu_family(), _model, _stepping, os::cpu_microcode_revision());
 972   assert(res > 0, "not enough temporary space allocated");
 973   insert_features_names(buf + res, sizeof(buf) - res, _features_names);
 974 
 975   _features_string = os::strdup(buf);
 976 
 977   // UseSSE is set to the smaller of what hardware supports and what
 978   // the command line requires.  I.e., you cannot set UseSSE to 2 on
 979   // older Pentiums which do not support it.
 980   int use_sse_limit = 0;
 981   if (UseSSE > 0) {
 982     if (UseSSE > 3 && supports_sse4_1()) {
 983       use_sse_limit = 4;
 984     } else if (UseSSE > 2 && supports_sse3()) {
 985       use_sse_limit = 3;
 986     } else if (UseSSE > 1 && supports_sse2()) {
 987       use_sse_limit = 2;
 988     } else if (UseSSE > 0 && supports_sse()) {
 989       use_sse_limit = 1;
 990     } else {
 991       use_sse_limit = 0;
 992     }
 993   }
 994   if (FLAG_IS_DEFAULT(UseSSE)) {
 995     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 996   } else if (UseSSE > use_sse_limit) {
 997     warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", (int) UseSSE, use_sse_limit);
 998     FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
 999   } else if (UseSSE < 0) {
1000     warning("UseSSE=%d is not valid, setting it to UseSSE=0", (int) UseSSE);
1001     FLAG_SET_DEFAULT(UseSSE, 0);
1002   }
1003 
1004   // Use AES instructions if available.
1005   if (supports_aes()) {
1006     if (FLAG_IS_DEFAULT(UseAES)) {
1007       FLAG_SET_DEFAULT(UseAES, true);
1008     }
1009     if (!UseAES) {
1010       if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1011         warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1012       }
1013       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1014     } else {
1015       if (UseSSE > 2) {
1016         if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1017           FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1018         }
1019       } else {
1020         // The AES intrinsic stubs require AES instruction support (of course)
1021         // but also require sse3 mode or higher for instructions it use.
1022         if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1023           warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1024         }
1025         FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1026       }
1027 
1028       // --AES-CTR begins--
1029       if (!UseAESIntrinsics) {
1030         if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1031           warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1032           FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1033         }
1034       } else {
1035         if (supports_sse4_1()) {
1036           if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1037             FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1038           }
1039         } else {
1040            // The AES-CTR intrinsic stubs require AES instruction support (of course)
1041            // but also require sse4.1 mode or higher for instructions it use.
1042           if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1043              warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1044            }
1045            FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1046         }
1047       }
1048       // --AES-CTR ends--
1049     }
1050   } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1051     if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1052       warning("AES instructions are not available on this CPU");
1053       FLAG_SET_DEFAULT(UseAES, false);
1054     }
1055     if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1056       warning("AES intrinsics are not available on this CPU");
1057       FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1058     }
1059     if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1060       warning("AES-CTR intrinsics are not available on this CPU");
1061       FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1062     }
1063   }
1064 
1065   // Use CLMUL instructions if available.
1066   if (supports_clmul()) {
1067     if (FLAG_IS_DEFAULT(UseCLMUL)) {
1068       UseCLMUL = true;
1069     }
1070   } else if (UseCLMUL) {
1071     if (!FLAG_IS_DEFAULT(UseCLMUL))
1072       warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1073     FLAG_SET_DEFAULT(UseCLMUL, false);
1074   }
1075 
1076   if (UseCLMUL && (UseSSE > 2)) {
1077     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1078       UseCRC32Intrinsics = true;
1079     }
1080   } else if (UseCRC32Intrinsics) {
1081     if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1082       warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1083     FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1084   }
1085 
1086 #ifdef _LP64
1087   if (supports_avx2()) {
1088     if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1089       UseAdler32Intrinsics = true;
1090     }
1091   } else if (UseAdler32Intrinsics) {
1092     if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1093       warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1094     }
1095     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1096   }
1097 #else
1098   if (UseAdler32Intrinsics) {
1099     warning("Adler32Intrinsics not available on this CPU.");
1100     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1101   }
1102 #endif
1103 
1104   if (supports_sse4_2() && supports_clmul()) {
1105     if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1106       UseCRC32CIntrinsics = true;
1107     }
1108   } else if (UseCRC32CIntrinsics) {
1109     if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1110       warning("CRC32C intrinsics are not available on this CPU");
1111     }
1112     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1113   }
1114 
1115   // GHASH/GCM intrinsics
1116   if (UseCLMUL && (UseSSE > 2)) {
1117     if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1118       UseGHASHIntrinsics = true;
1119     }
1120   } else if (UseGHASHIntrinsics) {
1121     if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1122       warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1123     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1124   }
1125 
1126   // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1127   if ((UseAVX > 2) && supports_avx512vl() && supports_avx512bw()) {
1128     if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1129       UseBASE64Intrinsics = true;
1130     }
1131   } else if (UseBASE64Intrinsics) {
1132      if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1133       warning("Base64 intrinsic requires EVEX instructions on this CPU");
1134     FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1135   }
1136 
1137   if (supports_fma() && UseSSE >= 2) { // Check UseSSE since FMA code uses SSE instructions
1138     if (FLAG_IS_DEFAULT(UseFMA)) {
1139       UseFMA = true;
1140     }
1141   } else if (UseFMA) {
1142     warning("FMA instructions are not available on this CPU");
1143     FLAG_SET_DEFAULT(UseFMA, false);
1144   }
1145 
1146   if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1147     UseMD5Intrinsics = true;
1148   }
1149 
1150   if (supports_sha() LP64_ONLY(|| supports_avx2() && supports_bmi2())) {
1151     if (FLAG_IS_DEFAULT(UseSHA)) {
1152       UseSHA = true;
1153     }
1154   } else if (UseSHA) {
1155     warning("SHA instructions are not available on this CPU");
1156     FLAG_SET_DEFAULT(UseSHA, false);
1157   }
1158 
1159   if (supports_sha() && supports_sse4_1() && UseSHA) {
1160     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1161       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1162     }
1163   } else if (UseSHA1Intrinsics) {
1164     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1165     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1166   }
1167 
1168   if (supports_sse4_1() && UseSHA) {
1169     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1170       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1171     }
1172   } else if (UseSHA256Intrinsics) {
1173     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1174     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1175   }
1176 
1177 #ifdef _LP64
1178   // These are only supported on 64-bit
1179   if (UseSHA && supports_avx2() && supports_bmi2()) {
1180     if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1181       FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1182     }
1183   } else
1184 #endif
1185   if (UseSHA512Intrinsics) {
1186     warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1187     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1188   }
1189 
1190   if (UseSHA3Intrinsics) {
1191     warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1192     FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1193   }
1194 
1195   if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1196     FLAG_SET_DEFAULT(UseSHA, false);
1197   }
1198 
1199   if (!supports_rtm() && UseRTMLocking) {
1200     vm_exit_during_initialization("RTM instructions are not available on this CPU");
1201   }
1202 
1203 #if INCLUDE_RTM_OPT
1204   if (UseRTMLocking) {
1205     if (!CompilerConfig::is_c2_enabled()) {
1206       // Only C2 does RTM locking optimization.
1207       vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1208     }
1209     if (is_intel_family_core()) {
1210       if ((_model == CPU_MODEL_HASWELL_E3) ||
1211           (_model == CPU_MODEL_HASWELL_E7 && _stepping < 3) ||
1212           (_model == CPU_MODEL_BROADWELL  && _stepping < 4)) {
1213         // currently a collision between SKL and HSW_E3
1214         if (!UnlockExperimentalVMOptions && UseAVX < 3) {
1215           vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this "
1216                                         "platform. It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
1217         } else {
1218           warning("UseRTMLocking is only available as experimental option on this platform.");
1219         }
1220       }
1221     }
1222     if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
1223       // RTM locking should be used only for applications with
1224       // high lock contention. For now we do not use it by default.
1225       vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
1226     }
1227   } else { // !UseRTMLocking
1228     if (UseRTMForStackLocks) {
1229       if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
1230         warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
1231       }
1232       FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
1233     }
1234     if (UseRTMDeopt) {
1235       FLAG_SET_DEFAULT(UseRTMDeopt, false);
1236     }
1237     if (PrintPreciseRTMLockingStatistics) {
1238       FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
1239     }
1240   }
1241 #else
1242   if (UseRTMLocking) {
1243     // Only C2 does RTM locking optimization.
1244     vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
1245   }
1246 #endif
1247 
1248 #ifdef COMPILER2
1249   if (UseFPUForSpilling) {
1250     if (UseSSE < 2) {
1251       // Only supported with SSE2+
1252       FLAG_SET_DEFAULT(UseFPUForSpilling, false);
1253     }
1254   }
1255 #endif
1256 
1257 #if COMPILER2_OR_JVMCI
1258   int max_vector_size = 0;
1259   if (UseSSE < 2) {
1260     // Vectors (in XMM) are only supported with SSE2+
1261     // SSE is always 2 on x64.
1262     max_vector_size = 0;
1263   } else if (UseAVX == 0 || !os_supports_avx_vectors()) {
1264     // 16 byte vectors (in XMM) are supported with SSE2+
1265     max_vector_size = 16;
1266   } else if (UseAVX == 1 || UseAVX == 2) {
1267     // 32 bytes vectors (in YMM) are only supported with AVX+
1268     max_vector_size = 32;
1269   } else if (UseAVX > 2) {
1270     // 64 bytes vectors (in ZMM) are only supported with AVX 3
1271     max_vector_size = 64;
1272   }
1273 
1274 #ifdef _LP64
1275   int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1276 #else
1277   int min_vector_size = 0;
1278 #endif
1279 
1280   if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1281     if (MaxVectorSize < min_vector_size) {
1282       warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1283       FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1284     }
1285     if (MaxVectorSize > max_vector_size) {
1286       warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1287       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1288     }
1289     if (!is_power_of_2(MaxVectorSize)) {
1290       warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1291       FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1292     }
1293   } else {
1294     // If default, use highest supported configuration
1295     FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1296   }
1297 
1298 #if defined(COMPILER2) && defined(ASSERT)
1299   if (MaxVectorSize > 0) {
1300     if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1301       tty->print_cr("State of YMM registers after signal handle:");
1302       int nreg = 2 LP64_ONLY(+2);
1303       const char* ymm_name[4] = {"0", "7", "8", "15"};
1304       for (int i = 0; i < nreg; i++) {
1305         tty->print("YMM%s:", ymm_name[i]);
1306         for (int j = 7; j >=0; j--) {
1307           tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1308         }
1309         tty->cr();
1310       }
1311     }
1312   }
1313 #endif // COMPILER2 && ASSERT
1314 
1315 #ifdef _LP64
1316   if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1317     UseMultiplyToLenIntrinsic = true;
1318   }
1319   if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1320     UseSquareToLenIntrinsic = true;
1321   }
1322   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1323     UseMulAddIntrinsic = true;
1324   }
1325   if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1326     UseMontgomeryMultiplyIntrinsic = true;
1327   }
1328   if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1329     UseMontgomerySquareIntrinsic = true;
1330   }
1331 #else
1332   if (UseMultiplyToLenIntrinsic) {
1333     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1334       warning("multiplyToLen intrinsic is not available in 32-bit VM");
1335     }
1336     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
1337   }
1338   if (UseMontgomeryMultiplyIntrinsic) {
1339     if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1340       warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
1341     }
1342     FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
1343   }
1344   if (UseMontgomerySquareIntrinsic) {
1345     if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1346       warning("montgomerySquare intrinsic is not available in 32-bit VM");
1347     }
1348     FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
1349   }
1350   if (UseSquareToLenIntrinsic) {
1351     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1352       warning("squareToLen intrinsic is not available in 32-bit VM");
1353     }
1354     FLAG_SET_DEFAULT(UseSquareToLenIntrinsic, false);
1355   }
1356   if (UseMulAddIntrinsic) {
1357     if (!FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1358       warning("mulAdd intrinsic is not available in 32-bit VM");
1359     }
1360     FLAG_SET_DEFAULT(UseMulAddIntrinsic, false);
1361   }
1362 #endif // _LP64
1363 #endif // COMPILER2_OR_JVMCI
1364 
1365   // On new cpus instructions which update whole XMM register should be used
1366   // to prevent partial register stall due to dependencies on high half.
1367   //
1368   // UseXmmLoadAndClearUpper == true  --> movsd(xmm, mem)
1369   // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1370   // UseXmmRegToRegMoveAll == true  --> movaps(xmm, xmm), movapd(xmm, xmm).
1371   // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm),  movsd(xmm, xmm).
1372 
1373 
1374   if (is_zx()) { // ZX cpus specific settings
1375     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1376       UseStoreImmI16 = false; // don't use it on ZX cpus
1377     }
1378     if ((cpu_family() == 6) || (cpu_family() == 7)) {
1379       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1380         // Use it on all ZX cpus
1381         UseAddressNop = true;
1382       }
1383     }
1384     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1385       UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1386     }
1387     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1388       if (supports_sse3()) {
1389         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1390       } else {
1391         UseXmmRegToRegMoveAll = false;
1392       }
1393     }
1394     if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1395 #ifdef COMPILER2
1396       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1397         // For new ZX cpus do the next optimization:
1398         // don't align the beginning of a loop if there are enough instructions
1399         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1400         // in current fetch line (OptoLoopAlignment) or the padding
1401         // is big (> MaxLoopPad).
1402         // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1403         // generated NOP instructions. 11 is the largest size of one
1404         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1405         MaxLoopPad = 11;
1406       }
1407 #endif // COMPILER2
1408       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1409         UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1410       }
1411       if (supports_sse4_2()) { // new ZX cpus
1412         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1413           UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1414         }
1415       }
1416       if (supports_sse4_2()) {
1417         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1418           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1419         }
1420       } else {
1421         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1422           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1423         }
1424         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1425       }
1426     }
1427 
1428     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1429       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1430     }
1431   }
1432 
1433   if (is_amd_family()) { // AMD cpus specific settings
1434     if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1435       // Use it on new AMD cpus starting from Opteron.
1436       UseAddressNop = true;
1437     }
1438     if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1439       // Use it on new AMD cpus starting from Opteron.
1440       UseNewLongLShift = true;
1441     }
1442     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1443       if (supports_sse4a()) {
1444         UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1445       } else {
1446         UseXmmLoadAndClearUpper = false;
1447       }
1448     }
1449     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1450       if (supports_sse4a()) {
1451         UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1452       } else {
1453         UseXmmRegToRegMoveAll = false;
1454       }
1455     }
1456     if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1457       if (supports_sse4a()) {
1458         UseXmmI2F = true;
1459       } else {
1460         UseXmmI2F = false;
1461       }
1462     }
1463     if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1464       if (supports_sse4a()) {
1465         UseXmmI2D = true;
1466       } else {
1467         UseXmmI2D = false;
1468       }
1469     }
1470     if (supports_sse4_2()) {
1471       if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1472         FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1473       }
1474     } else {
1475       if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1476         warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1477       }
1478       FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1479     }
1480 
1481     // some defaults for AMD family 15h
1482     if (cpu_family() == 0x15) {
1483       // On family 15h processors default is no sw prefetch
1484       if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1485         FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1486       }
1487       // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1488       if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1489         FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1490       }
1491       // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1492       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1493         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1494       }
1495       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1496         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1497       }
1498     }
1499 
1500 #ifdef COMPILER2
1501     if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1502       // Limit vectors size to 16 bytes on AMD cpus < 17h.
1503       FLAG_SET_DEFAULT(MaxVectorSize, 16);
1504     }
1505 #endif // COMPILER2
1506 
1507     // Some defaults for AMD family >= 17h && Hygon family 18h
1508     if (cpu_family() >= 0x17) {
1509       // On family >=17h processors use XMM and UnalignedLoadStores
1510       // for Array Copy
1511       if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1512         FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1513       }
1514       if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1515         FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1516       }
1517 #ifdef COMPILER2
1518       if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1519         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1520       }
1521 #endif
1522     }
1523   }
1524 
1525   if (is_intel()) { // Intel cpus specific settings
1526     if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1527       UseStoreImmI16 = false; // don't use it on Intel cpus
1528     }
1529     if (cpu_family() == 6 || cpu_family() == 15) {
1530       if (FLAG_IS_DEFAULT(UseAddressNop)) {
1531         // Use it on all Intel cpus starting from PentiumPro
1532         UseAddressNop = true;
1533       }
1534     }
1535     if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1536       UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1537     }
1538     if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1539       if (supports_sse3()) {
1540         UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1541       } else {
1542         UseXmmRegToRegMoveAll = false;
1543       }
1544     }
1545     if (cpu_family() == 6 && supports_sse3()) { // New Intel cpus
1546 #ifdef COMPILER2
1547       if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1548         // For new Intel cpus do the next optimization:
1549         // don't align the beginning of a loop if there are enough instructions
1550         // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1551         // in current fetch line (OptoLoopAlignment) or the padding
1552         // is big (> MaxLoopPad).
1553         // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1554         // generated NOP instructions. 11 is the largest size of one
1555         // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1556         MaxLoopPad = 11;
1557       }
1558 #endif // COMPILER2
1559 
1560       if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1561         UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1562       }
1563       if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1564         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1565           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1566         }
1567       }
1568       if (supports_sse4_2()) {
1569         if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1570           FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1571         }
1572       } else {
1573         if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1574           warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1575         }
1576         FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1577       }
1578     }
1579     if (is_atom_family() || is_knights_family()) {
1580 #ifdef COMPILER2
1581       if (FLAG_IS_DEFAULT(OptoScheduling)) {
1582         OptoScheduling = true;
1583       }
1584 #endif
1585       if (supports_sse4_2()) { // Silvermont
1586         if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1587           UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1588         }
1589       }
1590       if (FLAG_IS_DEFAULT(UseIncDec)) {
1591         FLAG_SET_DEFAULT(UseIncDec, false);
1592       }
1593     }
1594     if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1595       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1596     }
1597 #ifdef COMPILER2
1598     if (UseAVX > 2) {
1599       if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1600           (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1601            ArrayOperationPartialInlineSize != 0 &&
1602            ArrayOperationPartialInlineSize != 16 &&
1603            ArrayOperationPartialInlineSize != 32 &&
1604            ArrayOperationPartialInlineSize != 64)) {
1605         int inline_size = 0;
1606         if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1607           inline_size = 64;
1608         } else if (MaxVectorSize >= 32) {
1609           inline_size = 32;
1610         } else if (MaxVectorSize >= 16) {
1611           inline_size = 16;
1612         }
1613         if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1614           warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1615         }
1616         ArrayOperationPartialInlineSize = inline_size;
1617       }
1618 
1619       if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1620         ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1621         if (ArrayOperationPartialInlineSize) {
1622           warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize" INTX_FORMAT ")", MaxVectorSize);
1623         } else {
1624           warning("Setting ArrayOperationPartialInlineSize as " INTX_FORMAT, ArrayOperationPartialInlineSize);
1625         }
1626       }
1627     }
1628 #endif
1629   }
1630 
1631 #ifdef COMPILER2
1632   if (FLAG_IS_DEFAULT(OptimizeFill)) {
1633     if (MaxVectorSize < 32 || !VM_Version::supports_avx512vlbw()) {
1634       OptimizeFill = false;
1635     }
1636   }
1637 #endif
1638 
1639 #ifdef _LP64
1640   if (UseSSE42Intrinsics) {
1641     if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1642       UseVectorizedMismatchIntrinsic = true;
1643     }
1644   } else if (UseVectorizedMismatchIntrinsic) {
1645     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1646       warning("vectorizedMismatch intrinsics are not available on this CPU");
1647     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1648   }
1649 #else
1650   if (UseVectorizedMismatchIntrinsic) {
1651     if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1652       warning("vectorizedMismatch intrinsic is not available in 32-bit VM");
1653     }
1654     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1655   }
1656 #endif // _LP64
1657 
1658   // Use count leading zeros count instruction if available.
1659   if (supports_lzcnt()) {
1660     if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1661       UseCountLeadingZerosInstruction = true;
1662     }
1663    } else if (UseCountLeadingZerosInstruction) {
1664     warning("lzcnt instruction is not available on this CPU");
1665     FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1666   }
1667 
1668   // Use count trailing zeros instruction if available
1669   if (supports_bmi1()) {
1670     // tzcnt does not require VEX prefix
1671     if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1672       if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1673         // Don't use tzcnt if BMI1 is switched off on command line.
1674         UseCountTrailingZerosInstruction = false;
1675       } else {
1676         UseCountTrailingZerosInstruction = true;
1677       }
1678     }
1679   } else if (UseCountTrailingZerosInstruction) {
1680     warning("tzcnt instruction is not available on this CPU");
1681     FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1682   }
1683 
1684   // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1685   // VEX prefix is generated only when AVX > 0.
1686   if (supports_bmi1() && supports_avx()) {
1687     if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1688       UseBMI1Instructions = true;
1689     }
1690   } else if (UseBMI1Instructions) {
1691     warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1692     FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1693   }
1694 
1695   if (supports_bmi2() && supports_avx()) {
1696     if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1697       UseBMI2Instructions = true;
1698     }
1699   } else if (UseBMI2Instructions) {
1700     warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1701     FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1702   }
1703 
1704   // Use population count instruction if available.
1705   if (supports_popcnt()) {
1706     if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1707       UsePopCountInstruction = true;
1708     }
1709   } else if (UsePopCountInstruction) {
1710     warning("POPCNT instruction is not available on this CPU");
1711     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1712   }
1713 
1714   // Use fast-string operations if available.
1715   if (supports_erms()) {
1716     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1717       UseFastStosb = true;
1718     }
1719   } else if (UseFastStosb) {
1720     warning("fast-string operations are not available on this CPU");
1721     FLAG_SET_DEFAULT(UseFastStosb, false);
1722   }
1723 
1724   // For AMD Processors use XMM/YMM MOVDQU instructions
1725   // for Object Initialization as default
1726   if (is_amd() && cpu_family() >= 0x19) {
1727     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1728       UseFastStosb = false;
1729     }
1730   }
1731 
1732 #ifdef COMPILER2
1733   if (is_intel() && MaxVectorSize > 16) {
1734     if (FLAG_IS_DEFAULT(UseFastStosb)) {
1735       UseFastStosb = false;
1736     }
1737   }
1738 #endif
1739 
1740   // Use XMM/YMM MOVDQU instruction for Object Initialization
1741   if (!UseFastStosb && UseSSE >= 2 && UseUnalignedLoadStores) {
1742     if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1743       UseXMMForObjInit = true;
1744     }
1745   } else if (UseXMMForObjInit) {
1746     warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1747     FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1748   }
1749 
1750 #ifdef COMPILER2
1751   if (FLAG_IS_DEFAULT(AlignVector)) {
1752     // Modern processors allow misaligned memory operations for vectors.
1753     AlignVector = !UseUnalignedLoadStores;
1754   }
1755 #endif // COMPILER2
1756 
1757   if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1758     if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1759       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1760     } else if (!supports_sse() && supports_3dnow_prefetch()) {
1761       FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1762     }
1763   }
1764 
1765   // Allocation prefetch settings
1766   intx cache_line_size = prefetch_data_size();
1767   if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1768       (cache_line_size > AllocatePrefetchStepSize)) {
1769     FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1770   }
1771 
1772   if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1773     assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1774     if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1775       warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1776     }
1777     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1778   }
1779 
1780   if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1781     bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1782     FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1783   }
1784 
1785   if (is_intel() && cpu_family() == 6 && supports_sse3()) {
1786     if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1787         supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1788       FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1789     }
1790 #ifdef COMPILER2
1791     if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1792       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1793     }
1794 #endif
1795   }
1796 
1797   if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1798 #ifdef COMPILER2
1799     if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1800       FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1801     }
1802 #endif
1803   }
1804 
1805 #ifdef _LP64
1806   // Prefetch settings
1807 
1808   // Prefetch interval for gc copy/scan == 9 dcache lines.  Derived from
1809   // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1810   // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1811   // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1812 
1813   // gc copy/scan is disabled if prefetchw isn't supported, because
1814   // Prefetch::write emits an inlined prefetchw on Linux.
1815   // Do not use the 3dnow prefetchw instruction.  It isn't supported on em64t.
1816   // The used prefetcht0 instruction works for both amd64 and em64t.
1817 
1818   if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1819     FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1820   }
1821   if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1822     FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1823   }
1824 #endif
1825 
1826   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1827      (cache_line_size > ContendedPaddingWidth))
1828      ContendedPaddingWidth = cache_line_size;
1829 
1830   // This machine allows unaligned memory accesses
1831   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1832     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1833   }
1834 
1835 #ifndef PRODUCT
1836   if (log_is_enabled(Info, os, cpu)) {
1837     LogStream ls(Log(os, cpu)::info());
1838     outputStream* log = &ls;
1839     log->print_cr("Logical CPUs per core: %u",
1840                   logical_processors_per_package());
1841     log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1842     log->print("UseSSE=%d", (int) UseSSE);
1843     if (UseAVX > 0) {
1844       log->print("  UseAVX=%d", (int) UseAVX);
1845     }
1846     if (UseAES) {
1847       log->print("  UseAES=1");
1848     }
1849 #ifdef COMPILER2
1850     if (MaxVectorSize > 0) {
1851       log->print("  MaxVectorSize=%d", (int) MaxVectorSize);
1852     }
1853 #endif
1854     log->cr();
1855     log->print("Allocation");
1856     if (AllocatePrefetchStyle <= 0 || (UseSSE == 0 && !supports_3dnow_prefetch())) {
1857       log->print_cr(": no prefetching");
1858     } else {
1859       log->print(" prefetching: ");
1860       if (UseSSE == 0 && supports_3dnow_prefetch()) {
1861         log->print("PREFETCHW");
1862       } else if (UseSSE >= 1) {
1863         if (AllocatePrefetchInstr == 0) {
1864           log->print("PREFETCHNTA");
1865         } else if (AllocatePrefetchInstr == 1) {
1866           log->print("PREFETCHT0");
1867         } else if (AllocatePrefetchInstr == 2) {
1868           log->print("PREFETCHT2");
1869         } else if (AllocatePrefetchInstr == 3) {
1870           log->print("PREFETCHW");
1871         }
1872       }
1873       if (AllocatePrefetchLines > 1) {
1874         log->print_cr(" at distance %d, %d lines of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchLines, (int) AllocatePrefetchStepSize);
1875       } else {
1876         log->print_cr(" at distance %d, one line of %d bytes", (int) AllocatePrefetchDistance, (int) AllocatePrefetchStepSize);
1877       }
1878     }
1879 
1880     if (PrefetchCopyIntervalInBytes > 0) {
1881       log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1882     }
1883     if (PrefetchScanIntervalInBytes > 0) {
1884       log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1885     }
1886     if (ContendedPaddingWidth > 0) {
1887       log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1888     }
1889   }
1890 #endif // !PRODUCT
1891   if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1892       FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1893   }
1894   if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1895       FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1896   }
1897 }
1898 
1899 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1900   VirtualizationType vrt = VM_Version::get_detected_virtualization();
1901   if (vrt == XenHVM) {
1902     st->print_cr("Xen hardware-assisted virtualization detected");
1903   } else if (vrt == KVM) {
1904     st->print_cr("KVM virtualization detected");
1905   } else if (vrt == VMWare) {
1906     st->print_cr("VMWare virtualization detected");
1907     VirtualizationSupport::print_virtualization_info(st);
1908   } else if (vrt == HyperV) {
1909     st->print_cr("Hyper-V virtualization detected");
1910   } else if (vrt == HyperVRole) {
1911     st->print_cr("Hyper-V role detected");
1912   }
1913 }
1914 
1915 bool VM_Version::compute_has_intel_jcc_erratum() {
1916   if (!is_intel_family_core()) {
1917     // Only Intel CPUs are affected.
1918     return false;
1919   }
1920   // The following table of affected CPUs is based on the following document released by Intel:
1921   // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1922   switch (_model) {
1923   case 0x8E:
1924     // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1925     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1926     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1927     // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1928     // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1929     // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1930     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1931     // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1932     // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1933     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1934   case 0x4E:
1935     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1936     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1937     // 06_4E  | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1938     return _stepping == 0x3;
1939   case 0x55:
1940     // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1941     // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1942     // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1943     // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1944     // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1945     // 06_55  | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1946     return _stepping == 0x4 || _stepping == 0x7;
1947   case 0x5E:
1948     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1949     // 06_5E  | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1950     return _stepping == 0x3;
1951   case 0x9E:
1952     // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1953     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1954     // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1955     // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1956     // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1957     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1958     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1959     // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1960     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1961     // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1962     // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1963     // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1964     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1965     // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1966     return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1967   case 0xA5:
1968     // Not in Intel documentation.
1969     // 06_A5H |    | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
1970     return true;
1971   case 0xA6:
1972     // 06_A6H | 0  | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
1973     return _stepping == 0x0;
1974   case 0xAE:
1975     // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
1976     return _stepping == 0xA;
1977   default:
1978     // If we are running on another intel machine not recognized in the table, we are okay.
1979     return false;
1980   }
1981 }
1982 
1983 // On Xen, the cpuid instruction returns
1984 //  eax / registers[0]: Version of Xen
1985 //  ebx / registers[1]: chars 'XenV'
1986 //  ecx / registers[2]: chars 'MMXe'
1987 //  edx / registers[3]: chars 'nVMM'
1988 //
1989 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
1990 //  ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
1991 //  ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
1992 //  edx / registers[3]: chars 'M'    / 'ware' / 't Hv'
1993 //
1994 // more information :
1995 // https://kb.vmware.com/s/article/1009458
1996 //
1997 void VM_Version::check_virtualizations() {
1998   uint32_t registers[4] = {0};
1999   char signature[13] = {0};
2000 
2001   // Xen cpuid leaves can be found 0x100 aligned boundary starting
2002   // from 0x40000000 until 0x40010000.
2003   //   https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2004   for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2005     detect_virt_stub(leaf, registers);
2006     memcpy(signature, &registers[1], 12);
2007 
2008     if (strncmp("VMwareVMware", signature, 12) == 0) {
2009       Abstract_VM_Version::_detected_virtualization = VMWare;
2010       // check for extended metrics from guestlib
2011       VirtualizationSupport::initialize();
2012     } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2013       Abstract_VM_Version::_detected_virtualization = HyperV;
2014 #ifdef _WINDOWS
2015       // CPUID leaf 0x40000007 is available to the root partition only.
2016       // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2017       //   https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2018       detect_virt_stub(0x40000007, registers);
2019       if ((registers[0] != 0x0) ||
2020           (registers[1] != 0x0) ||
2021           (registers[2] != 0x0) ||
2022           (registers[3] != 0x0)) {
2023         Abstract_VM_Version::_detected_virtualization = HyperVRole;
2024       }
2025 #endif
2026     } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2027       Abstract_VM_Version::_detected_virtualization = KVM;
2028     } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2029       Abstract_VM_Version::_detected_virtualization = XenHVM;
2030     }
2031   }
2032 }
2033 
2034 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2035 // for implementing the array copy and clear operations.
2036 // The Intel platforms that supports the serialize instruction
2037 // has improved implementation of 64-byte load/stores and so the default
2038 // threshold is set to 0 for these platforms.
2039 int VM_Version::avx3_threshold() {
2040   return (is_intel_family_core() &&
2041           supports_serialize() &&
2042           FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2043 }
2044 
2045 static bool _vm_version_initialized = false;
2046 
2047 void VM_Version::initialize() {
2048   ResourceMark rm;
2049   // Making this stub must be FIRST use of assembler
2050   stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2051   if (stub_blob == NULL) {
2052     vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2053   }
2054   CodeBuffer c(stub_blob);
2055   VM_Version_StubGenerator g(&c);
2056 
2057   get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2058                                      g.generate_get_cpu_info());
2059   detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2060                                      g.generate_detect_virt());
2061 
2062   get_processor_features();
2063 
2064   LP64_ONLY(Assembler::precompute_instructions();)
2065 
2066   if (VM_Version::supports_hv()) { // Supports hypervisor
2067     check_virtualizations();
2068   }
2069   _vm_version_initialized = true;
2070 }
2071 
2072 typedef enum {
2073    CPU_FAMILY_8086_8088  = 0,
2074    CPU_FAMILY_INTEL_286  = 2,
2075    CPU_FAMILY_INTEL_386  = 3,
2076    CPU_FAMILY_INTEL_486  = 4,
2077    CPU_FAMILY_PENTIUM    = 5,
2078    CPU_FAMILY_PENTIUMPRO = 6,    // Same family several models
2079    CPU_FAMILY_PENTIUM_4  = 0xF
2080 } FamilyFlag;
2081 
2082 typedef enum {
2083   RDTSCP_FLAG  = 0x08000000, // bit 27
2084   INTEL64_FLAG = 0x20000000  // bit 29
2085 } _featureExtendedEdxFlag;
2086 
2087 typedef enum {
2088    FPU_FLAG     = 0x00000001,
2089    VME_FLAG     = 0x00000002,
2090    DE_FLAG      = 0x00000004,
2091    PSE_FLAG     = 0x00000008,
2092    TSC_FLAG     = 0x00000010,
2093    MSR_FLAG     = 0x00000020,
2094    PAE_FLAG     = 0x00000040,
2095    MCE_FLAG     = 0x00000080,
2096    CX8_FLAG     = 0x00000100,
2097    APIC_FLAG    = 0x00000200,
2098    SEP_FLAG     = 0x00000800,
2099    MTRR_FLAG    = 0x00001000,
2100    PGE_FLAG     = 0x00002000,
2101    MCA_FLAG     = 0x00004000,
2102    CMOV_FLAG    = 0x00008000,
2103    PAT_FLAG     = 0x00010000,
2104    PSE36_FLAG   = 0x00020000,
2105    PSNUM_FLAG   = 0x00040000,
2106    CLFLUSH_FLAG = 0x00080000,
2107    DTS_FLAG     = 0x00200000,
2108    ACPI_FLAG    = 0x00400000,
2109    MMX_FLAG     = 0x00800000,
2110    FXSR_FLAG    = 0x01000000,
2111    SSE_FLAG     = 0x02000000,
2112    SSE2_FLAG    = 0x04000000,
2113    SS_FLAG      = 0x08000000,
2114    HTT_FLAG     = 0x10000000,
2115    TM_FLAG      = 0x20000000
2116 } FeatureEdxFlag;
2117 
2118 static BufferBlob* cpuid_brand_string_stub_blob;
2119 static const int   cpuid_brand_string_stub_size = 550;
2120 
2121 extern "C" {
2122   typedef void (*getCPUIDBrandString_stub_t)(void*);
2123 }
2124 
2125 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = NULL;
2126 
2127 // VM_Version statics
2128 enum {
2129   ExtendedFamilyIdLength_INTEL = 16,
2130   ExtendedFamilyIdLength_AMD   = 24
2131 };
2132 
2133 const size_t VENDOR_LENGTH = 13;
2134 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2135 static char* _cpu_brand_string = NULL;
2136 static int64_t _max_qualified_cpu_frequency = 0;
2137 
2138 static int _no_of_threads = 0;
2139 static int _no_of_cores = 0;
2140 
2141 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2142   "8086/8088",
2143   "",
2144   "286",
2145   "386",
2146   "486",
2147   "Pentium",
2148   "Pentium Pro",   //or Pentium-M/Woodcrest depending on model
2149   "",
2150   "",
2151   "",
2152   "",
2153   "",
2154   "",
2155   "",
2156   "",
2157   "Pentium 4"
2158 };
2159 
2160 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2161   "",
2162   "",
2163   "",
2164   "",
2165   "5x86",
2166   "K5/K6",
2167   "Athlon/AthlonXP",
2168   "",
2169   "",
2170   "",
2171   "",
2172   "",
2173   "",
2174   "",
2175   "",
2176   "Opteron/Athlon64",
2177   "Opteron QC/Phenom",  // Barcelona et.al.
2178   "",
2179   "",
2180   "",
2181   "",
2182   "",
2183   "",
2184   "Zen"
2185 };
2186 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2187 // September 2013, Vol 3C Table 35-1
2188 const char* const _model_id_pentium_pro[] = {
2189   "",
2190   "Pentium Pro",
2191   "",
2192   "Pentium II model 3",
2193   "",
2194   "Pentium II model 5/Xeon/Celeron",
2195   "Celeron",
2196   "Pentium III/Pentium III Xeon",
2197   "Pentium III/Pentium III Xeon",
2198   "Pentium M model 9",    // Yonah
2199   "Pentium III, model A",
2200   "Pentium III, model B",
2201   "",
2202   "Pentium M model D",    // Dothan
2203   "",
2204   "Core 2",               // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2205   "",
2206   "",
2207   "",
2208   "",
2209   "",
2210   "",
2211   "Celeron",              // 0x16 Celeron 65nm
2212   "Core 2",               // 0x17 Penryn / Harpertown
2213   "",
2214   "",
2215   "Core i7",              // 0x1A CPU_MODEL_NEHALEM_EP
2216   "Atom",                 // 0x1B Z5xx series Silverthorn
2217   "",
2218   "Core 2",               // 0x1D Dunnington (6-core)
2219   "Nehalem",              // 0x1E CPU_MODEL_NEHALEM
2220   "",
2221   "",
2222   "",
2223   "",
2224   "",
2225   "",
2226   "Westmere",             // 0x25 CPU_MODEL_WESTMERE
2227   "",
2228   "",
2229   "",                     // 0x28
2230   "",
2231   "Sandy Bridge",         // 0x2a "2nd Generation Intel Core i7, i5, i3"
2232   "",
2233   "Westmere-EP",          // 0x2c CPU_MODEL_WESTMERE_EP
2234   "Sandy Bridge-EP",      // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2235   "Nehalem-EX",           // 0x2e CPU_MODEL_NEHALEM_EX
2236   "Westmere-EX",          // 0x2f CPU_MODEL_WESTMERE_EX
2237   "",
2238   "",
2239   "",
2240   "",
2241   "",
2242   "",
2243   "",
2244   "",
2245   "",
2246   "",
2247   "Ivy Bridge",           // 0x3a
2248   "",
2249   "Haswell",              // 0x3c "4th Generation Intel Core Processor"
2250   "",                     // 0x3d "Next Generation Intel Core Processor"
2251   "Ivy Bridge-EP",        // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2252   "",                     // 0x3f "Future Generation Intel Xeon Processor"
2253   "",
2254   "",
2255   "",
2256   "",
2257   "",
2258   "Haswell",              // 0x45 "4th Generation Intel Core Processor"
2259   "Haswell",              // 0x46 "4th Generation Intel Core Processor"
2260   NULL
2261 };
2262 
2263 /* Brand ID is for back compatibility
2264  * Newer CPUs uses the extended brand string */
2265 const char* const _brand_id[] = {
2266   "",
2267   "Celeron processor",
2268   "Pentium III processor",
2269   "Intel Pentium III Xeon processor",
2270   "",
2271   "",
2272   "",
2273   "",
2274   "Intel Pentium 4 processor",
2275   NULL
2276 };
2277 
2278 
2279 const char* const _feature_edx_id[] = {
2280   "On-Chip FPU",
2281   "Virtual Mode Extensions",
2282   "Debugging Extensions",
2283   "Page Size Extensions",
2284   "Time Stamp Counter",
2285   "Model Specific Registers",
2286   "Physical Address Extension",
2287   "Machine Check Exceptions",
2288   "CMPXCHG8B Instruction",
2289   "On-Chip APIC",
2290   "",
2291   "Fast System Call",
2292   "Memory Type Range Registers",
2293   "Page Global Enable",
2294   "Machine Check Architecture",
2295   "Conditional Mov Instruction",
2296   "Page Attribute Table",
2297   "36-bit Page Size Extension",
2298   "Processor Serial Number",
2299   "CLFLUSH Instruction",
2300   "",
2301   "Debug Trace Store feature",
2302   "ACPI registers in MSR space",
2303   "Intel Architecture MMX Technology",
2304   "Fast Float Point Save and Restore",
2305   "Streaming SIMD extensions",
2306   "Streaming SIMD extensions 2",
2307   "Self-Snoop",
2308   "Hyper Threading",
2309   "Thermal Monitor",
2310   "",
2311   "Pending Break Enable"
2312 };
2313 
2314 const char* const _feature_extended_edx_id[] = {
2315   "",
2316   "",
2317   "",
2318   "",
2319   "",
2320   "",
2321   "",
2322   "",
2323   "",
2324   "",
2325   "",
2326   "SYSCALL/SYSRET",
2327   "",
2328   "",
2329   "",
2330   "",
2331   "",
2332   "",
2333   "",
2334   "",
2335   "Execute Disable Bit",
2336   "",
2337   "",
2338   "",
2339   "",
2340   "",
2341   "",
2342   "RDTSCP",
2343   "",
2344   "Intel 64 Architecture",
2345   "",
2346   ""
2347 };
2348 
2349 const char* const _feature_ecx_id[] = {
2350   "Streaming SIMD Extensions 3",
2351   "PCLMULQDQ",
2352   "64-bit DS Area",
2353   "MONITOR/MWAIT instructions",
2354   "CPL Qualified Debug Store",
2355   "Virtual Machine Extensions",
2356   "Safer Mode Extensions",
2357   "Enhanced Intel SpeedStep technology",
2358   "Thermal Monitor 2",
2359   "Supplemental Streaming SIMD Extensions 3",
2360   "L1 Context ID",
2361   "",
2362   "Fused Multiply-Add",
2363   "CMPXCHG16B",
2364   "xTPR Update Control",
2365   "Perfmon and Debug Capability",
2366   "",
2367   "Process-context identifiers",
2368   "Direct Cache Access",
2369   "Streaming SIMD extensions 4.1",
2370   "Streaming SIMD extensions 4.2",
2371   "x2APIC",
2372   "MOVBE",
2373   "Popcount instruction",
2374   "TSC-Deadline",
2375   "AESNI",
2376   "XSAVE",
2377   "OSXSAVE",
2378   "AVX",
2379   "F16C",
2380   "RDRAND",
2381   ""
2382 };
2383 
2384 const char* const _feature_extended_ecx_id[] = {
2385   "LAHF/SAHF instruction support",
2386   "Core multi-processor legacy mode",
2387   "",
2388   "",
2389   "",
2390   "Advanced Bit Manipulations: LZCNT",
2391   "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2392   "Misaligned SSE mode",
2393   "",
2394   "",
2395   "",
2396   "",
2397   "",
2398   "",
2399   "",
2400   "",
2401   "",
2402   "",
2403   "",
2404   "",
2405   "",
2406   "",
2407   "",
2408   "",
2409   "",
2410   "",
2411   "",
2412   "",
2413   "",
2414   "",
2415   "",
2416   ""
2417 };
2418 
2419 void VM_Version::initialize_tsc(void) {
2420   ResourceMark rm;
2421 
2422   cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2423   if (cpuid_brand_string_stub_blob == NULL) {
2424     vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2425   }
2426   CodeBuffer c(cpuid_brand_string_stub_blob);
2427   VM_Version_StubGenerator g(&c);
2428   getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2429                                    g.generate_getCPUIDBrandString());
2430 }
2431 
2432 const char* VM_Version::cpu_model_description(void) {
2433   uint32_t cpu_family = extended_cpu_family();
2434   uint32_t cpu_model = extended_cpu_model();
2435   const char* model = NULL;
2436 
2437   if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2438     for (uint32_t i = 0; i <= cpu_model; i++) {
2439       model = _model_id_pentium_pro[i];
2440       if (model == NULL) {
2441         break;
2442       }
2443     }
2444   }
2445   return model;
2446 }
2447 
2448 const char* VM_Version::cpu_brand_string(void) {
2449   if (_cpu_brand_string == NULL) {
2450     _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2451     if (NULL == _cpu_brand_string) {
2452       return NULL;
2453     }
2454     int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2455     if (ret_val != OS_OK) {
2456       FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2457       _cpu_brand_string = NULL;
2458     }
2459   }
2460   return _cpu_brand_string;
2461 }
2462 
2463 const char* VM_Version::cpu_brand(void) {
2464   const char*  brand  = NULL;
2465 
2466   if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2467     int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2468     brand = _brand_id[0];
2469     for (int i = 0; brand != NULL && i <= brand_num; i += 1) {
2470       brand = _brand_id[i];
2471     }
2472   }
2473   return brand;
2474 }
2475 
2476 bool VM_Version::cpu_is_em64t(void) {
2477   return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2478 }
2479 
2480 bool VM_Version::is_netburst(void) {
2481   return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2482 }
2483 
2484 bool VM_Version::supports_tscinv_ext(void) {
2485   if (!supports_tscinv_bit()) {
2486     return false;
2487   }
2488 
2489   if (is_intel()) {
2490     return true;
2491   }
2492 
2493   if (is_amd()) {
2494     return !is_amd_Barcelona();
2495   }
2496 
2497   if (is_hygon()) {
2498     return true;
2499   }
2500 
2501   return false;
2502 }
2503 
2504 void VM_Version::resolve_cpu_information_details(void) {
2505 
2506   // in future we want to base this information on proper cpu
2507   // and cache topology enumeration such as:
2508   // Intel 64 Architecture Processor Topology Enumeration
2509   // which supports system cpu and cache topology enumeration
2510   // either using 2xAPICIDs or initial APICIDs
2511 
2512   // currently only rough cpu information estimates
2513   // which will not necessarily reflect the exact configuration of the system
2514 
2515   // this is the number of logical hardware threads
2516   // visible to the operating system
2517   _no_of_threads = os::processor_count();
2518 
2519   // find out number of threads per cpu package
2520   int threads_per_package = threads_per_core() * cores_per_cpu();
2521 
2522   // use amount of threads visible to the process in order to guess number of sockets
2523   _no_of_sockets = _no_of_threads / threads_per_package;
2524 
2525   // process might only see a subset of the total number of threads
2526   // from a single processor package. Virtualization/resource management for example.
2527   // If so then just write a hard 1 as num of pkgs.
2528   if (0 == _no_of_sockets) {
2529     _no_of_sockets = 1;
2530   }
2531 
2532   // estimate the number of cores
2533   _no_of_cores = cores_per_cpu() * _no_of_sockets;
2534 }
2535 
2536 
2537 const char* VM_Version::cpu_family_description(void) {
2538   int cpu_family_id = extended_cpu_family();
2539   if (is_amd()) {
2540     if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2541       return _family_id_amd[cpu_family_id];
2542     }
2543   }
2544   if (is_intel()) {
2545     if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2546       return cpu_model_description();
2547     }
2548     if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2549       return _family_id_intel[cpu_family_id];
2550     }
2551   }
2552   if (is_hygon()) {
2553     return "Dhyana";
2554   }
2555   return "Unknown x86";
2556 }
2557 
2558 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2559   assert(buf != NULL, "buffer is NULL!");
2560   assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2561 
2562   const char* cpu_type = NULL;
2563   const char* x64 = NULL;
2564 
2565   if (is_intel()) {
2566     cpu_type = "Intel";
2567     x64 = cpu_is_em64t() ? " Intel64" : "";
2568   } else if (is_amd()) {
2569     cpu_type = "AMD";
2570     x64 = cpu_is_em64t() ? " AMD64" : "";
2571   } else if (is_hygon()) {
2572     cpu_type = "Hygon";
2573     x64 = cpu_is_em64t() ? " AMD64" : "";
2574   } else {
2575     cpu_type = "Unknown x86";
2576     x64 = cpu_is_em64t() ? " x86_64" : "";
2577   }
2578 
2579   jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2580     cpu_type,
2581     cpu_family_description(),
2582     supports_ht() ? " (HT)" : "",
2583     supports_sse3() ? " SSE3" : "",
2584     supports_ssse3() ? " SSSE3" : "",
2585     supports_sse4_1() ? " SSE4.1" : "",
2586     supports_sse4_2() ? " SSE4.2" : "",
2587     supports_sse4a() ? " SSE4A" : "",
2588     is_netburst() ? " Netburst" : "",
2589     is_intel_family_core() ? " Core" : "",
2590     x64);
2591 
2592   return OS_OK;
2593 }
2594 
2595 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2596   assert(buf != NULL, "buffer is NULL!");
2597   assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2598   assert(getCPUIDBrandString_stub != NULL, "not initialized");
2599 
2600   // invoke newly generated asm code to fetch CPU Brand String
2601   getCPUIDBrandString_stub(&_cpuid_info);
2602 
2603   // fetch results into buffer
2604   *((uint32_t*) &buf[0])  = _cpuid_info.proc_name_0;
2605   *((uint32_t*) &buf[4])  = _cpuid_info.proc_name_1;
2606   *((uint32_t*) &buf[8])  = _cpuid_info.proc_name_2;
2607   *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2608   *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2609   *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2610   *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2611   *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2612   *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2613   *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2614   *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2615   *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2616 
2617   return OS_OK;
2618 }
2619 
2620 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2621   guarantee(buf != NULL, "buffer is NULL!");
2622   guarantee(buf_len > 0, "buffer len not enough!");
2623 
2624   unsigned int flag = 0;
2625   unsigned int fi = 0;
2626   size_t       written = 0;
2627   const char*  prefix = "";
2628 
2629 #define WRITE_TO_BUF(string)                                                          \
2630   {                                                                                   \
2631     int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2632     if (res < 0) {                                                                    \
2633       return buf_len - 1;                                                             \
2634     }                                                                                 \
2635     written += res;                                                                   \
2636     if (prefix[0] == '\0') {                                                          \
2637       prefix = ", ";                                                                  \
2638     }                                                                                 \
2639   }
2640 
2641   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2642     if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2643       continue; /* no hyperthreading */
2644     } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2645       continue; /* no fast system call */
2646     }
2647     if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2648       WRITE_TO_BUF(_feature_edx_id[fi]);
2649     }
2650   }
2651 
2652   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2653     if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2654       WRITE_TO_BUF(_feature_ecx_id[fi]);
2655     }
2656   }
2657 
2658   for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2659     if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2660       WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2661     }
2662   }
2663 
2664   for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2665     if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2666       WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2667     }
2668   }
2669 
2670   if (supports_tscinv_bit()) {
2671       WRITE_TO_BUF("Invariant TSC");
2672   }
2673 
2674   return written;
2675 }
2676 
2677 /**
2678  * Write a detailed description of the cpu to a given buffer, including
2679  * feature set.
2680  */
2681 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2682   assert(buf != NULL, "buffer is NULL!");
2683   assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2684 
2685   static const char* unknown = "<unknown>";
2686   char               vendor_id[VENDOR_LENGTH];
2687   const char*        family = NULL;
2688   const char*        model = NULL;
2689   const char*        brand = NULL;
2690   int                outputLen = 0;
2691 
2692   family = cpu_family_description();
2693   if (family == NULL) {
2694     family = unknown;
2695   }
2696 
2697   model = cpu_model_description();
2698   if (model == NULL) {
2699     model = unknown;
2700   }
2701 
2702   brand = cpu_brand_string();
2703 
2704   if (brand == NULL) {
2705     brand = cpu_brand();
2706     if (brand == NULL) {
2707       brand = unknown;
2708     }
2709   }
2710 
2711   *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2712   *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2713   *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2714   vendor_id[VENDOR_LENGTH-1] = '\0';
2715 
2716   outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2717     "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2718     "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2719     "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2720     "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2721     "Supports: ",
2722     brand,
2723     vendor_id,
2724     family,
2725     extended_cpu_family(),
2726     model,
2727     extended_cpu_model(),
2728     cpu_stepping(),
2729     _cpuid_info.std_cpuid1_eax.bits.ext_family,
2730     _cpuid_info.std_cpuid1_eax.bits.ext_model,
2731     _cpuid_info.std_cpuid1_eax.bits.proc_type,
2732     _cpuid_info.std_cpuid1_eax.value,
2733     _cpuid_info.std_cpuid1_ebx.value,
2734     _cpuid_info.std_cpuid1_ecx.value,
2735     _cpuid_info.std_cpuid1_edx.value,
2736     _cpuid_info.ext_cpuid1_eax,
2737     _cpuid_info.ext_cpuid1_ebx,
2738     _cpuid_info.ext_cpuid1_ecx,
2739     _cpuid_info.ext_cpuid1_edx);
2740 
2741   if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2742     if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2743     return OS_ERR;
2744   }
2745 
2746   cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2747 
2748   return OS_OK;
2749 }
2750 
2751 
2752 // Fill in Abstract_VM_Version statics
2753 void VM_Version::initialize_cpu_information() {
2754   assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2755   assert(!_initialized, "shouldn't be initialized yet");
2756   resolve_cpu_information_details();
2757 
2758   // initialize cpu_name and cpu_desc
2759   cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2760   cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2761   _initialized = true;
2762 }
2763 
2764 /**
2765  *  For information about extracting the frequency from the cpu brand string, please see:
2766  *
2767  *    Intel Processor Identification and the CPUID Instruction
2768  *    Application Note 485
2769  *    May 2012
2770  *
2771  * The return value is the frequency in Hz.
2772  */
2773 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2774   const char* const brand_string = cpu_brand_string();
2775   if (brand_string == NULL) {
2776     return 0;
2777   }
2778   const int64_t MEGA = 1000000;
2779   int64_t multiplier = 0;
2780   int64_t frequency = 0;
2781   uint8_t idx = 0;
2782   // The brand string buffer is at most 48 bytes.
2783   // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2784   for (; idx < 48-2; ++idx) {
2785     // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2786     // Search brand string for "yHz" where y is M, G, or T.
2787     if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2788       if (brand_string[idx] == 'M') {
2789         multiplier = MEGA;
2790       } else if (brand_string[idx] == 'G') {
2791         multiplier = MEGA * 1000;
2792       } else if (brand_string[idx] == 'T') {
2793         multiplier = MEGA * MEGA;
2794       }
2795       break;
2796     }
2797   }
2798   if (multiplier > 0) {
2799     // Compute frequency (in Hz) from brand string.
2800     if (brand_string[idx-3] == '.') { // if format is "x.xx"
2801       frequency =  (brand_string[idx-4] - '0') * multiplier;
2802       frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2803       frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2804     } else { // format is "xxxx"
2805       frequency =  (brand_string[idx-4] - '0') * 1000;
2806       frequency += (brand_string[idx-3] - '0') * 100;
2807       frequency += (brand_string[idx-2] - '0') * 10;
2808       frequency += (brand_string[idx-1] - '0');
2809       frequency *= multiplier;
2810     }
2811   }
2812   return frequency;
2813 }
2814 
2815 
2816 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2817   if (_max_qualified_cpu_frequency == 0) {
2818     _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2819   }
2820   return _max_qualified_cpu_frequency;
2821 }
2822