1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CPU_X86_VM_VERSION_X86_HPP
26 #define CPU_X86_VM_VERSION_X86_HPP
27
28 #include "runtime/abstract_vm_version.hpp"
29 #include "utilities/debug.hpp"
30 #include "utilities/macros.hpp"
31 #include "utilities/sizes.hpp"
32
33 class stringStream;
34
35 class VM_Version : public Abstract_VM_Version {
36 friend class VMStructs;
37 friend class JVMCIVMStructs;
38
39 public:
40 // cpuid result register layouts. These are all unions of a uint32_t
41 // (in case anyone wants access to the register as a whole) and a bitfield.
42
43 union StdCpuid1Eax {
44 uint32_t value;
45 struct {
46 uint32_t stepping : 4,
47 model : 4,
48 family : 4,
49 proc_type : 2,
50 : 2,
51 ext_model : 4,
52 ext_family : 8,
53 : 4;
54 } bits;
55 };
56
57 union StdCpuid1Ebx { // example, unused
58 uint32_t value;
59 struct {
60 uint32_t brand_id : 8,
61 clflush_size : 8,
62 threads_per_cpu : 8,
63 apic_id : 8;
64 } bits;
65 };
66
67 union StdCpuid1Ecx {
68 uint32_t value;
69 struct {
70 uint32_t sse3 : 1,
71 clmul : 1,
72 : 1,
73 monitor : 1,
74 : 1,
75 vmx : 1,
76 : 1,
77 est : 1,
78 : 1,
79 ssse3 : 1,
80 cid : 1,
81 : 1,
82 fma : 1,
83 cmpxchg16: 1,
84 : 4,
85 dca : 1,
86 sse4_1 : 1,
87 sse4_2 : 1,
88 : 2,
89 popcnt : 1,
90 : 1,
91 aes : 1,
92 : 1,
93 osxsave : 1,
94 avx : 1,
95 f16c : 1,
96 : 1,
97 hv : 1;
98 } bits;
99 };
100
101 union StdCpuid1Edx {
102 uint32_t value;
103 struct {
104 uint32_t : 4,
105 tsc : 1,
106 : 3,
107 cmpxchg8 : 1,
108 : 6,
109 cmov : 1,
110 : 3,
111 clflush : 1,
112 : 3,
113 mmx : 1,
114 fxsr : 1,
115 sse : 1,
116 sse2 : 1,
117 : 1,
118 ht : 1,
119 : 3;
120 } bits;
121 };
122
123 union DcpCpuid4Eax {
124 uint32_t value;
125 struct {
126 uint32_t cache_type : 5,
127 : 21,
128 cores_per_cpu : 6;
129 } bits;
130 };
131
132 union DcpCpuid4Ebx {
133 uint32_t value;
134 struct {
135 uint32_t L1_line_size : 12,
136 partitions : 10,
137 associativity : 10;
138 } bits;
139 };
140
141 union TplCpuidBEbx {
142 uint32_t value;
143 struct {
144 uint32_t logical_cpus : 16,
145 : 16;
146 } bits;
147 };
148
149 union ExtCpuid1Ecx {
150 uint32_t value;
151 struct {
152 uint32_t LahfSahf : 1,
153 CmpLegacy : 1,
154 : 3,
155 lzcnt : 1,
156 sse4a : 1,
157 misalignsse : 1,
158 prefetchw : 1,
159 : 23;
160 } bits;
161 };
162
163 union ExtCpuid1Edx {
164 uint32_t value;
165 struct {
166 uint32_t : 22,
167 mmx_amd : 1,
168 mmx : 1,
169 fxsr : 1,
170 fxsr_opt : 1,
171 pdpe1gb : 1,
172 rdtscp : 1,
173 : 1,
174 long_mode : 1,
175 tdnow2 : 1,
176 tdnow : 1;
177 } bits;
178 };
179
180 union ExtCpuid5Ex {
181 uint32_t value;
182 struct {
183 uint32_t L1_line_size : 8,
184 L1_tag_lines : 8,
185 L1_assoc : 8,
186 L1_size : 8;
187 } bits;
188 };
189
190 union ExtCpuid7Edx {
191 uint32_t value;
192 struct {
193 uint32_t : 8,
194 tsc_invariance : 1,
195 : 23;
196 } bits;
197 };
198
199 union ExtCpuid8Ecx {
200 uint32_t value;
201 struct {
202 uint32_t threads_per_cpu : 8,
203 : 24;
204 } bits;
205 };
206
207 union SefCpuid7Eax {
208 uint32_t value;
209 };
210
211 union SefCpuid7Ebx {
212 uint32_t value;
213 struct {
214 uint32_t fsgsbase : 1,
215 : 2,
216 bmi1 : 1,
217 : 1,
218 avx2 : 1,
219 : 2,
220 bmi2 : 1,
221 erms : 1,
222 : 1,
223 rtm : 1,
224 : 4,
225 avx512f : 1,
226 avx512dq : 1,
227 : 1,
228 adx : 1,
229 : 1,
230 avx512ifma : 1,
231 : 1,
232 clflushopt : 1,
233 clwb : 1,
234 : 1,
235 avx512pf : 1,
236 avx512er : 1,
237 avx512cd : 1,
238 sha : 1,
239 avx512bw : 1,
240 avx512vl : 1;
241 } bits;
242 };
243
244 union SefCpuid7Ecx {
245 uint32_t value;
246 struct {
247 uint32_t prefetchwt1 : 1,
248 avx512_vbmi : 1,
249 umip : 1,
250 pku : 1,
251 ospke : 1,
252 : 1,
253 avx512_vbmi2 : 1,
254 cet_ss : 1,
255 gfni : 1,
256 vaes : 1,
257 avx512_vpclmulqdq : 1,
258 avx512_vnni : 1,
259 avx512_bitalg : 1,
260 : 1,
261 avx512_vpopcntdq : 1,
262 : 1,
263 : 1,
264 mawau : 5,
265 rdpid : 1,
266 : 9;
267 } bits;
268 };
269
270 union SefCpuid7Edx {
271 uint32_t value;
272 struct {
273 uint32_t : 2,
274 avx512_4vnniw : 1,
275 avx512_4fmaps : 1,
276 fast_short_rep_mov : 1,
277 : 9,
278 serialize : 1,
279 hybrid: 1,
280 : 4,
281 cet_ibt : 1,
282 : 2,
283 avx512_fp16 : 1,
284 : 8;
285 } bits;
286 };
287
288 union SefCpuid7SubLeaf1Eax {
289 uint32_t value;
290 struct {
291 uint32_t sha512 : 1,
292 : 22,
293 avx_ifma : 1,
294 : 8;
295 } bits;
296 };
297
298 union SefCpuid7SubLeaf1Edx {
299 uint32_t value;
300 struct {
301 uint32_t : 19,
302 avx10 : 1,
303 : 1,
304 apx_f : 1,
305 : 10;
306 } bits;
307 };
308
309 union StdCpuidEax29Ecx0 {
310 uint32_t value;
311 struct {
312 uint32_t apx_nci_ndd_nf : 1,
313 : 31;
314 } bits;
315 };
316
317 union StdCpuid24MainLeafEax {
318 uint32_t value;
319 struct {
320 uint32_t sub_leaves_cnt : 31;
321 } bits;
322 };
323
324 union StdCpuid24MainLeafEbx {
325 uint32_t value;
326 struct {
327 uint32_t avx10_converged_isa_version : 8,
328 : 8,
329 : 2,
330 avx10_vlen_512 : 1,
331 : 13;
332 } bits;
333 };
334
335 union ExtCpuid1EEbx {
336 uint32_t value;
337 struct {
338 uint32_t : 8,
339 threads_per_core : 8,
340 : 16;
341 } bits;
342 };
343
344 union XemXcr0Eax {
345 uint32_t value;
346 struct {
347 uint32_t x87 : 1,
348 sse : 1,
349 ymm : 1,
350 bndregs : 1,
351 bndcsr : 1,
352 opmask : 1,
353 zmm512 : 1,
354 zmm32 : 1,
355 : 11,
356 apx_f : 1,
357 : 12;
358 } bits;
359 };
360
361 protected:
362 static int _cpu;
363 static int _model;
364 static int _stepping;
365
366 static bool _has_intel_jcc_erratum;
367
368 static address _cpuinfo_segv_addr; // address of instruction which causes SEGV
369 static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
370 static address _cpuinfo_segv_addr_apx; // address of instruction which causes APX specific SEGV
371 static address _cpuinfo_cont_addr_apx; // address of instruction after the one which causes APX specific SEGV
372
373 /*
374 * Update following files when declaring new flags:
375 * test/lib-test/jdk/test/whitebox/CPUInfoTest.java
376 * src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java
377 */
378 enum Feature_Flag {
379 #define CPU_FEATURE_FLAGS(decl) \
380 decl(CX8, "cx8", 0) /* next bits are from cpuid 1 (EDX) */ \
381 decl(CMOV, "cmov", 1) \
382 decl(FXSR, "fxsr", 2) \
383 decl(HT, "ht", 3) \
384 \
385 decl(MMX, "mmx", 4) \
386 decl(3DNOW_PREFETCH, "3dnowpref", 5) /* Processor supports 3dnow prefetch and prefetchw instructions */ \
387 /* may not necessarily support other 3dnow instructions */ \
388 decl(SSE, "sse", 6) \
389 decl(SSE2, "sse2", 7) \
390 \
391 decl(SSE3, "sse3", 8 ) /* SSE3 comes from cpuid 1 (ECX) */ \
392 decl(SSSE3, "ssse3", 9 ) \
393 decl(SSE4A, "sse4a", 10) \
394 decl(SSE4_1, "sse4.1", 11) \
395 \
396 decl(SSE4_2, "sse4.2", 12) \
397 decl(POPCNT, "popcnt", 13) \
398 decl(LZCNT, "lzcnt", 14) \
399 decl(TSC, "tsc", 15) \
400 \
401 decl(TSCINV_BIT, "tscinvbit", 16) \
402 decl(TSCINV, "tscinv", 17) \
403 decl(AVX, "avx", 18) \
404 decl(AVX2, "avx2", 19) \
405 \
406 decl(AES, "aes", 20) \
407 decl(ERMS, "erms", 21) /* enhanced 'rep movsb/stosb' instructions */ \
408 decl(CLMUL, "clmul", 22) /* carryless multiply for CRC */ \
409 decl(BMI1, "bmi1", 23) \
410 \
411 decl(BMI2, "bmi2", 24) \
412 decl(RTM, "rtm", 25) /* Restricted Transactional Memory instructions */ \
413 decl(ADX, "adx", 26) \
414 decl(AVX512F, "avx512f", 27) /* AVX 512bit foundation instructions */ \
415 \
416 decl(AVX512DQ, "avx512dq", 28) \
417 decl(AVX512PF, "avx512pf", 29) \
418 decl(AVX512ER, "avx512er", 30) \
419 decl(AVX512CD, "avx512cd", 31) \
420 \
421 decl(AVX512BW, "avx512bw", 32) /* Byte and word vector instructions */ \
422 decl(AVX512VL, "avx512vl", 33) /* EVEX instructions with smaller vector length */ \
423 decl(SHA, "sha", 34) /* SHA instructions */ \
424 decl(FMA, "fma", 35) /* FMA instructions */ \
425 \
426 decl(VZEROUPPER, "vzeroupper", 36) /* Vzeroupper instruction */ \
427 decl(AVX512_VPOPCNTDQ, "avx512_vpopcntdq", 37) /* Vector popcount */ \
428 decl(AVX512_VPCLMULQDQ, "avx512_vpclmulqdq", 38) /* Vector carryless multiplication */ \
429 decl(AVX512_VAES, "avx512_vaes", 39) /* Vector AES instruction */ \
430 \
431 decl(AVX512_VNNI, "avx512_vnni", 40) /* Vector Neural Network Instructions */ \
432 decl(FLUSH, "clflush", 41) /* flush instruction */ \
433 decl(FLUSHOPT, "clflushopt", 42) /* flusopth instruction */ \
434 decl(CLWB, "clwb", 43) /* clwb instruction */ \
435 \
436 decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \
437 decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
438 decl(HV, "hv", 46) /* Hypervisor instructions */ \
439 decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */ \
440 decl(RDTSCP, "rdtscp", 48) /* RDTSCP instruction */ \
441 decl(RDPID, "rdpid", 49) /* RDPID instruction */ \
442 decl(FSRM, "fsrm", 50) /* Fast Short REP MOV */ \
443 decl(GFNI, "gfni", 51) /* Vector GFNI instructions */ \
444 decl(AVX512_BITALG, "avx512_bitalg", 52) /* Vector sub-word popcount and bit gather instructions */\
445 decl(F16C, "f16c", 53) /* Half-precision and single precision FP conversion instructions*/ \
446 decl(PKU, "pku", 54) /* Protection keys for user-mode pages */ \
447 decl(OSPKE, "ospke", 55) /* OS enables protection keys */ \
448 decl(CET_IBT, "cet_ibt", 56) /* Control Flow Enforcement - Indirect Branch Tracking */ \
449 decl(CET_SS, "cet_ss", 57) /* Control Flow Enforcement - Shadow Stack */ \
450 decl(AVX512_IFMA, "avx512_ifma", 58) /* Integer Vector FMA instructions*/ \
451 decl(AVX_IFMA, "avx_ifma", 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
452 decl(APX_F, "apx_f", 60) /* Intel Advanced Performance Extensions*/ \
453 decl(SHA512, "sha512", 61) /* SHA512 instructions*/ \
454 decl(AVX512_FP16, "avx512_fp16", 62) /* AVX512 FP16 ISA support*/ \
455 decl(AVX10_1, "avx10_1", 63) /* AVX10 512 bit vector ISA Version 1 support*/ \
456 decl(AVX10_2, "avx10_2", 64) /* AVX10 512 bit vector ISA Version 2 support*/ \
457 decl(HYBRID, "hybrid", 65) /* Hybrid architecture */
458
459 #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (bit),
460 CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
461 #undef DECLARE_CPU_FEATURE_FLAG
462 MAX_CPU_FEATURES
463 };
464
465 class VM_Features {
466 friend class VMStructs;
467 friend class JVMCIVMStructs;
468
469 private:
470 uint64_t _features_bitmap[(MAX_CPU_FEATURES / BitsPerLong) + 1];
471
472 STATIC_ASSERT(sizeof(_features_bitmap) * BitsPerByte >= MAX_CPU_FEATURES);
473
474 // Number of 8-byte elements in _bitmap.
475 constexpr static int features_bitmap_element_count() {
476 return sizeof(_features_bitmap) / sizeof(uint64_t);
477 }
478
479 constexpr static int features_bitmap_element_shift_count() {
480 return LogBitsPerLong;
481 }
482
483 constexpr static uint64_t features_bitmap_element_mask() {
484 return (1ULL << features_bitmap_element_shift_count()) - 1;
485 }
486
487 static int index(Feature_Flag feature) {
488 int idx = feature >> features_bitmap_element_shift_count();
489 assert(idx < features_bitmap_element_count(), "Features array index out of bounds");
490 return idx;
491 }
492
493 static uint64_t bit_mask(Feature_Flag feature) {
494 return (1ULL << (feature & features_bitmap_element_mask()));
495 }
496
497 static int _features_bitmap_size; // for JVMCI purposes
498 public:
499 VM_Features() {
500 for (int i = 0; i < features_bitmap_element_count(); i++) {
501 _features_bitmap[i] = 0;
502 }
503 }
504
505 void set_feature(Feature_Flag feature) {
506 int idx = index(feature);
507 _features_bitmap[idx] |= bit_mask(feature);
508 }
509
510 void clear_feature(VM_Version::Feature_Flag feature) {
511 int idx = index(feature);
512 _features_bitmap[idx] &= ~bit_mask(feature);
513 }
514
515 bool supports_feature(VM_Version::Feature_Flag feature) {
516 int idx = index(feature);
517 return (_features_bitmap[idx] & bit_mask(feature)) != 0;
518 }
519 };
520
521 // CPU feature flags vector, can be affected by VM settings.
522 static VM_Features _features;
523
524 // Original CPU feature flags vector, not affected by VM settings.
525 static VM_Features _cpu_features;
526
527 static const char* _features_names[];
528
529 static void clear_cpu_features() {
530 _features = VM_Features();
531 _cpu_features = VM_Features();
532 }
533
534 enum Extended_Family {
535 // AMD
536 CPU_FAMILY_AMD_11H = 0x11,
537 CPU_FAMILY_AMD_17H = 0x17, /* Zen1 & Zen2 */
538 CPU_FAMILY_AMD_19H = 0x19, /* Zen3 & Zen4 */
539 // ZX
540 CPU_FAMILY_ZX_CORE_F6 = 6,
541 CPU_FAMILY_ZX_CORE_F7 = 7,
542 // Intel
543 CPU_FAMILY_INTEL_CORE = 6,
544 CPU_MODEL_NEHALEM = 0x1e,
545 CPU_MODEL_NEHALEM_EP = 0x1a,
546 CPU_MODEL_NEHALEM_EX = 0x2e,
547 CPU_MODEL_WESTMERE = 0x25,
548 CPU_MODEL_WESTMERE_EP = 0x2c,
549 CPU_MODEL_WESTMERE_EX = 0x2f,
550 CPU_MODEL_SANDYBRIDGE = 0x2a,
551 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
552 CPU_MODEL_IVYBRIDGE_EP = 0x3a,
553 CPU_MODEL_HASWELL_E3 = 0x3c,
554 CPU_MODEL_HASWELL_E7 = 0x3f,
555 CPU_MODEL_BROADWELL = 0x3d,
556 CPU_MODEL_SKYLAKE = 0x55
557 };
558
559 // cpuid information block. All info derived from executing cpuid with
560 // various function numbers is stored here. Intel and AMD info is
561 // merged in this block: accessor methods disentangle it.
562 //
563 // The info block is laid out in subblocks of 4 dwords corresponding to
564 // eax, ebx, ecx and edx, whether or not they contain anything useful.
565 class CpuidInfo {
566 public:
567 // cpuid function 0
568 uint32_t std_max_function;
569 uint32_t std_vendor_name_0;
570 uint32_t std_vendor_name_1;
571 uint32_t std_vendor_name_2;
572
573 // cpuid function 1
574 StdCpuid1Eax std_cpuid1_eax;
575 StdCpuid1Ebx std_cpuid1_ebx;
576 StdCpuid1Ecx std_cpuid1_ecx;
577 StdCpuid1Edx std_cpuid1_edx;
578
579 // cpuid function 4 (deterministic cache parameters)
580 DcpCpuid4Eax dcp_cpuid4_eax;
581 DcpCpuid4Ebx dcp_cpuid4_ebx;
582 uint32_t dcp_cpuid4_ecx; // unused currently
583 uint32_t dcp_cpuid4_edx; // unused currently
584
585 // cpuid function 7 (structured extended features enumeration leaf)
586 // eax = 7, ecx = 0
587 SefCpuid7Eax sef_cpuid7_eax;
588 SefCpuid7Ebx sef_cpuid7_ebx;
589 SefCpuid7Ecx sef_cpuid7_ecx;
590 SefCpuid7Edx sef_cpuid7_edx;
591
592 // cpuid function 7 (structured extended features enumeration sub-leaf 1)
593 // eax = 7, ecx = 1
594 SefCpuid7SubLeaf1Eax sefsl1_cpuid7_eax;
595 SefCpuid7SubLeaf1Edx sefsl1_cpuid7_edx;
596
597 // cpuid function 24 converged vector ISA main leaf
598 // eax = 24, ecx = 0
599 StdCpuid24MainLeafEax std_cpuid24_eax;
600 StdCpuid24MainLeafEbx std_cpuid24_ebx;
601
602 // cpuid function 0x29 APX Advanced Performance Extensions Leaf
603 // eax = 0x29, ecx = 0
604 StdCpuidEax29Ecx0 std_cpuid29_ebx;
605
606 // cpuid function 0xB (processor topology)
607 // ecx = 0
608 uint32_t tpl_cpuidB0_eax;
609 TplCpuidBEbx tpl_cpuidB0_ebx;
610 uint32_t tpl_cpuidB0_ecx; // unused currently
611 uint32_t tpl_cpuidB0_edx; // unused currently
612
613 // ecx = 1
614 uint32_t tpl_cpuidB1_eax;
615 TplCpuidBEbx tpl_cpuidB1_ebx;
616 uint32_t tpl_cpuidB1_ecx; // unused currently
617 uint32_t tpl_cpuidB1_edx; // unused currently
618
619 // ecx = 2
620 uint32_t tpl_cpuidB2_eax;
621 TplCpuidBEbx tpl_cpuidB2_ebx;
622 uint32_t tpl_cpuidB2_ecx; // unused currently
623 uint32_t tpl_cpuidB2_edx; // unused currently
624
625 // cpuid function 0x80000000 // example, unused
626 uint32_t ext_max_function;
627 uint32_t ext_vendor_name_0;
628 uint32_t ext_vendor_name_1;
629 uint32_t ext_vendor_name_2;
630
631 // cpuid function 0x80000001
632 uint32_t ext_cpuid1_eax; // reserved
633 uint32_t ext_cpuid1_ebx; // reserved
634 ExtCpuid1Ecx ext_cpuid1_ecx;
635 ExtCpuid1Edx ext_cpuid1_edx;
636
637 // cpuid functions 0x80000002 thru 0x80000004: example, unused
638 uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
639 uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
640 uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
641
642 // cpuid function 0x80000005 // AMD L1, Intel reserved
643 uint32_t ext_cpuid5_eax; // unused currently
644 uint32_t ext_cpuid5_ebx; // reserved
645 ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD)
646 ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD)
647
648 // cpuid function 0x80000007
649 uint32_t ext_cpuid7_eax; // reserved
650 uint32_t ext_cpuid7_ebx; // reserved
651 uint32_t ext_cpuid7_ecx; // reserved
652 ExtCpuid7Edx ext_cpuid7_edx; // tscinv
653
654 // cpuid function 0x80000008
655 uint32_t ext_cpuid8_eax; // unused currently
656 uint32_t ext_cpuid8_ebx; // reserved
657 ExtCpuid8Ecx ext_cpuid8_ecx;
658 uint32_t ext_cpuid8_edx; // reserved
659
660 // cpuid function 0x8000001E // AMD 17h
661 uint32_t ext_cpuid1E_eax;
662 ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
663 uint32_t ext_cpuid1E_ecx;
664 uint32_t ext_cpuid1E_edx; // unused currently
665
666 // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
667 XemXcr0Eax xem_xcr0_eax;
668 uint32_t xem_xcr0_edx; // reserved
669
670 // Space to save ymm registers after signal handle
671 int ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15
672
673 // Space to save zmm registers after signal handle
674 int zmm_save[16*4]; // Save zmm0, zmm7, zmm8, zmm31
675
676 // Space to save apx registers after signal handle
677 jlong apx_save[2]; // Save r16 and r31
678
679 VM_Features feature_flags() const;
680
681 // Asserts
682 void assert_is_initialized() const {
683 assert(std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
684 }
685
686 // Extractors
687 uint32_t extended_cpu_family() const {
688 uint32_t result = std_cpuid1_eax.bits.family;
689 result += std_cpuid1_eax.bits.ext_family;
690 return result;
691 }
692
693 uint32_t extended_cpu_model() const {
694 uint32_t result = std_cpuid1_eax.bits.model;
695 result |= std_cpuid1_eax.bits.ext_model << 4;
696 return result;
697 }
698
699 uint32_t cpu_stepping() const {
700 uint32_t result = std_cpuid1_eax.bits.stepping;
701 return result;
702 }
703 };
704
705 private:
706 // The actual cpuid info block
707 static CpuidInfo _cpuid_info;
708
709 // Extractors and predicates
710 static uint logical_processor_count() {
711 uint result = threads_per_core();
712 return result;
713 }
714
715 static bool compute_has_intel_jcc_erratum();
716
717 static bool os_supports_avx_vectors();
718 static bool os_supports_apx_egprs();
719 static void get_processor_features();
720
721 public:
722 // Offsets for cpuid asm stub
723 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
724 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
725 static ByteSize std_cpuid24_offset() { return byte_offset_of(CpuidInfo, std_cpuid24_eax); }
726 static ByteSize std_cpuid29_offset() { return byte_offset_of(CpuidInfo, std_cpuid29_ebx); }
727 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
728 static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
729 static ByteSize sefsl1_cpuid7_offset() { return byte_offset_of(CpuidInfo, sefsl1_cpuid7_eax); }
730 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
731 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
732 static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
733 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
734 static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
735 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
736 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
737 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
738 static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
739 static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
740 static ByteSize zmm_save_offset() { return byte_offset_of(CpuidInfo, zmm_save); }
741 static ByteSize apx_save_offset() { return byte_offset_of(CpuidInfo, apx_save); }
742
743 // The value used to check ymm register after signal handle
744 static int ymm_test_value() { return 0xCAFEBABE; }
745 static jlong egpr_test_value() { return 0xCAFEBABECAFEBABELL; }
746
747 static void get_cpu_info_wrapper();
748 static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
749 static bool is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
750 static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
751 static address cpuinfo_cont_addr() { return _cpuinfo_cont_addr; }
752
753 static void set_cpuinfo_segv_addr_apx(address pc) { _cpuinfo_segv_addr_apx = pc; }
754 static bool is_cpuinfo_segv_addr_apx(address pc) { return _cpuinfo_segv_addr_apx == pc; }
755 static void set_cpuinfo_cont_addr_apx(address pc) { _cpuinfo_cont_addr_apx = pc; }
756 static address cpuinfo_cont_addr_apx() { return _cpuinfo_cont_addr_apx; }
757
758 static void clear_apx_test_state();
759
760 static void clean_cpuFeatures() {
761 VM_Version::clear_cpu_features();
762 }
763 static void set_avx_cpuFeatures() {
764 _features.set_feature(CPU_SSE);
765 _features.set_feature(CPU_SSE2);
766 _features.set_feature(CPU_AVX);
767 _features.set_feature(CPU_VZEROUPPER);
768 }
769 static void set_evex_cpuFeatures() {
770 _features.set_feature(CPU_AVX10_1);
771 _features.set_feature(CPU_AVX512F);
772 _features.set_feature(CPU_SSE);
773 _features.set_feature(CPU_SSE2);
774 _features.set_feature(CPU_VZEROUPPER);
775 }
776 static void set_apx_cpuFeatures() {
777 _features.set_feature(CPU_APX_F);
778 }
779 static void set_bmi_cpuFeatures() {
780 _features.set_feature(CPU_BMI1);
781 _features.set_feature(CPU_BMI2);
782 _features.set_feature(CPU_LZCNT);
783 _features.set_feature(CPU_POPCNT);
784 }
785
786 // Initialization
787 static void initialize();
788
789 // Override Abstract_VM_Version implementation
790 static void print_platform_virtualization_info(outputStream*);
791
792 //
793 // Processor family:
794 // 3 - 386
795 // 4 - 486
796 // 5 - Pentium
797 // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
798 // Pentium M, Core Solo, Core Duo, Core2 Duo
799 // family 6 model: 9, 13, 14, 15
800 // 0x0f - Pentium 4, Opteron
801 //
802 // Note: The cpu family should be used to select between
803 // instruction sequences which are valid on all Intel
804 // processors. Use the feature test functions below to
805 // determine whether a particular instruction is supported.
806 //
807 static void assert_is_initialized() { _cpuid_info.assert_is_initialized(); }
808 static uint32_t extended_cpu_family() { return _cpuid_info.extended_cpu_family(); }
809 static uint32_t extended_cpu_model() { return _cpuid_info.extended_cpu_model(); }
810 static uint32_t cpu_stepping() { return _cpuid_info.cpu_stepping(); }
811 static int cpu_family() { return _cpu;}
812 static bool is_P6() { return cpu_family() >= 6; }
813 static bool is_intel_server_family() { return cpu_family() == 6 || cpu_family() == 19; }
814 static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
815 static bool is_hygon() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH'
816 static bool is_amd_family() { return is_amd() || is_hygon(); }
817 static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
818 static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS '
819 static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
820 static bool is_knights_family() { return UseKNLSetting || ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
821
822 static bool supports_processor_topology() {
823 return (_cpuid_info.std_max_function >= 0xB) &&
824 // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level.
825 // Some cpus have max cpuid >= 0xB but do not support processor topology.
826 (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
827 }
828
829 static uint cores_per_cpu();
830 static uint threads_per_core();
831 static uint L1_line_size();
832
833 static uint prefetch_data_size() {
834 return L1_line_size();
835 }
836
837 //
838 // Feature identification which can be affected by VM settings
839 //
840 static bool supports_cmov() { return _features.supports_feature(CPU_CMOV); }
841 static bool supports_fxsr() { return _features.supports_feature(CPU_FXSR); }
842 static bool supports_ht() { return _features.supports_feature(CPU_HT); }
843 static bool supports_mmx() { return _features.supports_feature(CPU_MMX); }
844 static bool supports_sse() { return _features.supports_feature(CPU_SSE); }
845 static bool supports_sse2() { return _features.supports_feature(CPU_SSE2); }
846 static bool supports_sse3() { return _features.supports_feature(CPU_SSE3); }
847 static bool supports_ssse3() { return _features.supports_feature(CPU_SSSE3); }
848 static bool supports_sse4_1() { return _features.supports_feature(CPU_SSE4_1); }
849 static bool supports_sse4_2() { return _features.supports_feature(CPU_SSE4_2); }
850 static bool supports_popcnt() { return _features.supports_feature(CPU_POPCNT); }
851 static bool supports_avx() { return _features.supports_feature(CPU_AVX); }
852 static bool supports_avx2() { return _features.supports_feature(CPU_AVX2); }
853 static bool supports_tsc() { return _features.supports_feature(CPU_TSC); }
854 static bool supports_rdtscp() { return _features.supports_feature(CPU_RDTSCP); }
855 static bool supports_rdpid() { return _features.supports_feature(CPU_RDPID); }
856 static bool supports_aes() { return _features.supports_feature(CPU_AES); }
857 static bool supports_erms() { return _features.supports_feature(CPU_ERMS); }
858 static bool supports_fsrm() { return _features.supports_feature(CPU_FSRM); }
859 static bool supports_clmul() { return _features.supports_feature(CPU_CLMUL); }
860 static bool supports_rtm() { return _features.supports_feature(CPU_RTM); }
861 static bool supports_bmi1() { return _features.supports_feature(CPU_BMI1); }
862 static bool supports_bmi2() { return _features.supports_feature(CPU_BMI2); }
863 static bool supports_adx() { return _features.supports_feature(CPU_ADX); }
864 static bool supports_evex() { return _features.supports_feature(CPU_AVX512F); }
865 static bool supports_avx512dq() { return _features.supports_feature(CPU_AVX512DQ); }
866 static bool supports_avx512ifma() { return _features.supports_feature(CPU_AVX512_IFMA); }
867 static bool supports_avxifma() { return _features.supports_feature(CPU_AVX_IFMA); }
868 static bool supports_avx512pf() { return _features.supports_feature(CPU_AVX512PF); }
869 static bool supports_avx512er() { return _features.supports_feature(CPU_AVX512ER); }
870 static bool supports_avx512cd() { return _features.supports_feature(CPU_AVX512CD); }
871 static bool supports_avx512bw() { return _features.supports_feature(CPU_AVX512BW); }
872 static bool supports_avx512vl() { return _features.supports_feature(CPU_AVX512VL); }
873 static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
874 static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); }
875 static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
876 static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
877 supports_avx512bw() && supports_avx512dq()); }
878 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
879 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
880 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
881 static bool supports_apx_f() { return _features.supports_feature(CPU_APX_F); }
882 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
883 static bool supports_sha() { return _features.supports_feature(CPU_SHA); }
884 static bool supports_fma() { return _features.supports_feature(CPU_FMA) && supports_avx(); }
885 static bool supports_vzeroupper() { return _features.supports_feature(CPU_VZEROUPPER); }
886 static bool supports_avx512_vpopcntdq() { return _features.supports_feature(CPU_AVX512_VPOPCNTDQ); }
887 static bool supports_avx512_vpclmulqdq() { return _features.supports_feature(CPU_AVX512_VPCLMULQDQ); }
888 static bool supports_avx512_vaes() { return _features.supports_feature(CPU_AVX512_VAES); }
889 static bool supports_gfni() { return _features.supports_feature(CPU_GFNI); }
890 static bool supports_avx512_vnni() { return _features.supports_feature(CPU_AVX512_VNNI); }
891 static bool supports_avx512_bitalg() { return _features.supports_feature(CPU_AVX512_BITALG); }
892 static bool supports_avx512_vbmi() { return _features.supports_feature(CPU_AVX512_VBMI); }
893 static bool supports_avx512_vbmi2() { return _features.supports_feature(CPU_AVX512_VBMI2); }
894 static bool supports_avx512_fp16() { return _features.supports_feature(CPU_AVX512_FP16); }
895 static bool supports_hv() { return _features.supports_feature(CPU_HV); }
896 static bool supports_serialize() { return _features.supports_feature(CPU_SERIALIZE); }
897 static bool supports_hybrid() { return _features.supports_feature(CPU_HYBRID); }
898 static bool supports_f16c() { return _features.supports_feature(CPU_F16C); }
899 static bool supports_pku() { return _features.supports_feature(CPU_PKU); }
900 static bool supports_ospke() { return _features.supports_feature(CPU_OSPKE); }
901 static bool supports_cet_ss() { return _features.supports_feature(CPU_CET_SS); }
902 static bool supports_cet_ibt() { return _features.supports_feature(CPU_CET_IBT); }
903 static bool supports_sha512() { return _features.supports_feature(CPU_SHA512); }
904
905 // IntelĀ® AVX10 introduces a versioned approach for enumeration that is monotonically increasing, inclusive,
906 // and supporting all vector lengths. Feature set supported by an AVX10 vector ISA version is also supported
907 // by all the versions above it.
908 static bool supports_avx10_1() { return _features.supports_feature(CPU_AVX10_1);}
909 static bool supports_avx10_2() { return _features.supports_feature(CPU_AVX10_2);}
910
911 //
912 // Feature identification not affected by VM flags
913 //
914 static bool cpu_supports_evex() { return _cpu_features.supports_feature(CPU_AVX512F); }
915
916 static bool supports_avx512_simd_sort() {
917 if (supports_avx512dq()) {
918 // Disable AVX512 version of SIMD Sort on AMD Zen4 Processors.
919 if (is_amd() && cpu_family() == CPU_FAMILY_AMD_19H) {
920 return false;
921 }
922 return true;
923 }
924 return false;
925 }
926
927 // Intel features
928 static bool is_intel_family_core() { return is_intel() &&
929 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
930
931 static bool is_intel_skylake() { return is_intel_family_core() &&
932 extended_cpu_model() == CPU_MODEL_SKYLAKE; }
933
934 #ifdef COMPILER2
935 // Determine if it's running on Cascade Lake using default options.
936 static bool is_default_intel_cascade_lake();
937 #endif
938
939 static bool is_intel_cascade_lake();
940
941 static bool is_intel_darkmont();
942
943 static int avx3_threshold();
944
945 static bool is_intel_tsc_synched_at_init();
946
947 static void insert_features_names(VM_Version::VM_Features features, stringStream& ss);
948
949 // This checks if the JVM is potentially affected by an erratum on Intel CPUs (SKX102)
950 // that causes unpredictable behaviour when jcc crosses 64 byte boundaries. Its microcode
951 // mitigation causes regressions when jumps or fused conditional branches cross or end at
952 // 32 byte boundaries.
953 static bool has_intel_jcc_erratum() { return _has_intel_jcc_erratum; }
954
955 // AMD features
956 static bool supports_3dnow_prefetch() { return _features.supports_feature(CPU_3DNOW_PREFETCH); }
957 static bool supports_lzcnt() { return _features.supports_feature(CPU_LZCNT); }
958 static bool supports_sse4a() { return _features.supports_feature(CPU_SSE4A); }
959
960 static bool is_amd_Barcelona() { return is_amd() &&
961 extended_cpu_family() == CPU_FAMILY_AMD_11H; }
962
963 // Intel and AMD newer cores support fast timestamps well
964 static bool supports_tscinv_bit() {
965 return _features.supports_feature(CPU_TSCINV_BIT);
966 }
967 static bool supports_tscinv() {
968 return _features.supports_feature(CPU_TSCINV);
969 }
970
971 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
972 static bool has_fast_idiv() { return is_intel() && is_intel_server_family() &&
973 supports_sse3() && _model != 0x1C; }
974
975 static bool supports_compare_and_exchange() { return true; }
976
977 static int allocate_prefetch_distance(bool use_watermark_prefetch);
978
979 // SSE2 and later processors implement a 'pause' instruction
980 // that can be used for efficient implementation of
981 // the intrinsic for java.lang.Thread.onSpinWait()
982 static bool supports_on_spin_wait() { return supports_sse2(); }
983
984 // x86_64 supports fast class initialization checks
985 static bool supports_fast_class_init_checks() {
986 return true;
987 }
988
989 // x86_64 supports secondary supers table
990 constexpr static bool supports_secondary_supers_table() {
991 return true;
992 }
993
994 constexpr static bool supports_stack_watermark_barrier() {
995 return true;
996 }
997
998 constexpr static bool supports_recursive_lightweight_locking() {
999 return true;
1000 }
1001
1002 // For AVX CPUs only. f16c support is disabled if UseAVX == 0.
1003 static bool supports_float16() {
1004 return supports_f16c() || supports_avx512vl() || supports_avx512_fp16();
1005 }
1006
1007 // Check intrinsic support
1008 static bool is_intrinsic_supported(vmIntrinsicID id);
1009
1010 // there are several insns to force cache line sync to memory which
1011 // we can use to ensure mapped non-volatile memory is up to date with
1012 // pending in-cache changes.
1013 //
1014 // 64 bit cpus always support clflush which writes back and evicts
1015 // on 32 bit cpus support is recorded via a feature flag
1016 //
1017 // clflushopt is optional and acts like clflush except it does
1018 // not synchronize with other memory ops. it needs a preceding
1019 // and trailing StoreStore fence
1020 //
1021 // clwb is an optional intel-specific instruction which
1022 // writes back without evicting the line. it also does not
1023 // synchronize with other memory ops. so, it needs preceding
1024 // and trailing StoreStore fences.
1025
1026 static bool supports_clflush(); // Can't inline due to header file conflict
1027
1028 // Note: CPU_FLUSHOPT and CPU_CLWB bits should always be zero for 32-bit
1029 static bool supports_clflushopt() { return (_features.supports_feature(CPU_FLUSHOPT)); }
1030 static bool supports_clwb() { return (_features.supports_feature(CPU_CLWB)); }
1031
1032 // Old CPUs perform lea on AGU which causes additional latency transferring the
1033 // value from/to ALU for other operations
1034 static bool supports_fast_2op_lea() {
1035 return (is_intel() && supports_avx()) || // Sandy Bridge and above
1036 (is_amd() && supports_avx()); // Jaguar and Bulldozer and above
1037 }
1038
1039 // Pre Icelake Intels suffer inefficiency regarding 3-operand lea, which contains
1040 // all of base register, index register and displacement immediate, with 3 latency.
1041 // Note that when the address contains no displacement but the base register is
1042 // rbp or r13, the machine code must contain a zero displacement immediate,
1043 // effectively transform a 2-operand lea into a 3-operand lea. This can be
1044 // replaced by add-add or lea-add
1045 static bool supports_fast_3op_lea() {
1046 return supports_fast_2op_lea() &&
1047 ((is_intel() && supports_clwb() && !is_intel_skylake()) || // Icelake and above
1048 is_amd());
1049 }
1050
1051 #ifdef __APPLE__
1052 // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
1053 static bool is_cpu_emulated();
1054 #endif
1055
1056 // support functions for virtualization detection
1057 private:
1058 static void check_virtualizations();
1059
1060 static const char* cpu_family_description(void);
1061 static const char* cpu_model_description(void);
1062 static const char* cpu_brand(void);
1063 static const char* cpu_brand_string(void);
1064
1065 static int cpu_type_description(char* const buf, size_t buf_len);
1066 static int cpu_detailed_description(char* const buf, size_t buf_len);
1067 static int cpu_extended_brand_string(char* const buf, size_t buf_len);
1068
1069 static bool cpu_is_em64t(void);
1070 static bool is_netburst(void);
1071
1072 // Returns bytes written excluding termninating null byte.
1073 static size_t cpu_write_support_string(char* const buf, size_t buf_len);
1074 static void resolve_cpu_information_details(void);
1075 static int64_t max_qualified_cpu_freq_from_brand_string(void);
1076
1077 public:
1078 // Offsets for cpuid asm stub brand string
1079 static ByteSize proc_name_0_offset() { return byte_offset_of(CpuidInfo, proc_name_0); }
1080 static ByteSize proc_name_1_offset() { return byte_offset_of(CpuidInfo, proc_name_1); }
1081 static ByteSize proc_name_2_offset() { return byte_offset_of(CpuidInfo, proc_name_2); }
1082 static ByteSize proc_name_3_offset() { return byte_offset_of(CpuidInfo, proc_name_3); }
1083 static ByteSize proc_name_4_offset() { return byte_offset_of(CpuidInfo, proc_name_4); }
1084 static ByteSize proc_name_5_offset() { return byte_offset_of(CpuidInfo, proc_name_5); }
1085 static ByteSize proc_name_6_offset() { return byte_offset_of(CpuidInfo, proc_name_6); }
1086 static ByteSize proc_name_7_offset() { return byte_offset_of(CpuidInfo, proc_name_7); }
1087 static ByteSize proc_name_8_offset() { return byte_offset_of(CpuidInfo, proc_name_8); }
1088 static ByteSize proc_name_9_offset() { return byte_offset_of(CpuidInfo, proc_name_9); }
1089 static ByteSize proc_name_10_offset() { return byte_offset_of(CpuidInfo, proc_name_10); }
1090 static ByteSize proc_name_11_offset() { return byte_offset_of(CpuidInfo, proc_name_11); }
1091
1092 static int64_t maximum_qualified_cpu_frequency(void);
1093
1094 static bool supports_tscinv_ext(void);
1095
1096 static void initialize_cpu_information(void);
1097 };
1098
1099 #endif // CPU_X86_VM_VERSION_X86_HPP