1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #ifndef CPU_X86_VM_VERSION_X86_HPP
26 #define CPU_X86_VM_VERSION_X86_HPP
27
28 #include "runtime/abstract_vm_version.hpp"
29 #include "utilities/debug.hpp"
30 #include "utilities/macros.hpp"
31 #include "utilities/sizes.hpp"
32
33 class stringStream;
34
35 class VM_Version : public Abstract_VM_Version {
36 friend class VMStructs;
37 friend class JVMCIVMStructs;
38
39 public:
40 // cpuid result register layouts. These are all unions of a uint32_t
41 // (in case anyone wants access to the register as a whole) and a bitfield.
42
43 union StdCpuid1Eax {
44 uint32_t value;
45 struct {
46 uint32_t stepping : 4,
47 model : 4,
48 family : 4,
49 proc_type : 2,
50 : 2,
51 ext_model : 4,
52 ext_family : 8,
53 : 4;
54 } bits;
55 };
56
57 union StdCpuid1Ebx { // example, unused
58 uint32_t value;
59 struct {
60 uint32_t brand_id : 8,
61 clflush_size : 8,
62 threads_per_cpu : 8,
63 apic_id : 8;
64 } bits;
65 };
66
67 union StdCpuid1Ecx {
68 uint32_t value;
69 struct {
70 uint32_t sse3 : 1,
71 clmul : 1,
72 : 1,
73 monitor : 1,
74 : 1,
75 vmx : 1,
76 : 1,
77 est : 1,
78 : 1,
79 ssse3 : 1,
80 cid : 1,
81 : 1,
82 fma : 1,
83 cmpxchg16: 1,
84 : 4,
85 dca : 1,
86 sse4_1 : 1,
87 sse4_2 : 1,
88 : 2,
89 popcnt : 1,
90 : 1,
91 aes : 1,
92 : 1,
93 osxsave : 1,
94 avx : 1,
95 f16c : 1,
96 : 1,
97 hv : 1;
98 } bits;
99 };
100
101 union StdCpuid1Edx {
102 uint32_t value;
103 struct {
104 uint32_t : 4,
105 tsc : 1,
106 : 3,
107 cmpxchg8 : 1,
108 : 6,
109 cmov : 1,
110 : 3,
111 clflush : 1,
112 : 3,
113 mmx : 1,
114 fxsr : 1,
115 sse : 1,
116 sse2 : 1,
117 : 1,
118 ht : 1,
119 : 3;
120 } bits;
121 };
122
123 union DcpCpuid4Eax {
124 uint32_t value;
125 struct {
126 uint32_t cache_type : 5,
127 : 21,
128 cores_per_cpu : 6;
129 } bits;
130 };
131
132 union DcpCpuid4Ebx {
133 uint32_t value;
134 struct {
135 uint32_t L1_line_size : 12,
136 partitions : 10,
137 associativity : 10;
138 } bits;
139 };
140
141 union TplCpuidBEbx {
142 uint32_t value;
143 struct {
144 uint32_t logical_cpus : 16,
145 : 16;
146 } bits;
147 };
148
149 union ExtCpuid1Ecx {
150 uint32_t value;
151 struct {
152 uint32_t LahfSahf : 1,
153 CmpLegacy : 1,
154 : 3,
155 lzcnt : 1,
156 sse4a : 1,
157 misalignsse : 1,
158 prefetchw : 1,
159 : 23;
160 } bits;
161 };
162
163 union ExtCpuid1Edx {
164 uint32_t value;
165 struct {
166 uint32_t : 22,
167 mmx_amd : 1,
168 mmx : 1,
169 fxsr : 1,
170 fxsr_opt : 1,
171 pdpe1gb : 1,
172 rdtscp : 1,
173 : 1,
174 long_mode : 1,
175 tdnow2 : 1,
176 tdnow : 1;
177 } bits;
178 };
179
180 union ExtCpuid5Ex {
181 uint32_t value;
182 struct {
183 uint32_t L1_line_size : 8,
184 L1_tag_lines : 8,
185 L1_assoc : 8,
186 L1_size : 8;
187 } bits;
188 };
189
190 union ExtCpuid7Edx {
191 uint32_t value;
192 struct {
193 uint32_t : 8,
194 tsc_invariance : 1,
195 : 23;
196 } bits;
197 };
198
199 union ExtCpuid8Ecx {
200 uint32_t value;
201 struct {
202 uint32_t threads_per_cpu : 8,
203 : 24;
204 } bits;
205 };
206
207 union SefCpuid7Eax {
208 uint32_t value;
209 };
210
211 union SefCpuid7Ebx {
212 uint32_t value;
213 struct {
214 uint32_t fsgsbase : 1,
215 : 2,
216 bmi1 : 1,
217 : 1,
218 avx2 : 1,
219 : 2,
220 bmi2 : 1,
221 erms : 1,
222 : 1,
223 rtm : 1,
224 : 4,
225 avx512f : 1,
226 avx512dq : 1,
227 : 1,
228 adx : 1,
229 : 1,
230 avx512ifma : 1,
231 : 1,
232 clflushopt : 1,
233 clwb : 1,
234 : 1,
235 avx512pf : 1,
236 avx512er : 1,
237 avx512cd : 1,
238 sha : 1,
239 avx512bw : 1,
240 avx512vl : 1;
241 } bits;
242 };
243
244 union SefCpuid7Ecx {
245 uint32_t value;
246 struct {
247 uint32_t prefetchwt1 : 1,
248 avx512_vbmi : 1,
249 umip : 1,
250 pku : 1,
251 ospke : 1,
252 : 1,
253 avx512_vbmi2 : 1,
254 cet_ss : 1,
255 gfni : 1,
256 vaes : 1,
257 avx512_vpclmulqdq : 1,
258 avx512_vnni : 1,
259 avx512_bitalg : 1,
260 : 1,
261 avx512_vpopcntdq : 1,
262 : 1,
263 : 1,
264 mawau : 5,
265 rdpid : 1,
266 : 9;
267 } bits;
268 };
269
270 union SefCpuid7Edx {
271 uint32_t value;
272 struct {
273 uint32_t : 2,
274 avx512_4vnniw : 1,
275 avx512_4fmaps : 1,
276 fast_short_rep_mov : 1,
277 : 9,
278 serialize : 1,
279 hybrid: 1,
280 : 4,
281 cet_ibt : 1,
282 : 2,
283 avx512_fp16 : 1,
284 : 8;
285 } bits;
286 };
287
288 union SefCpuid7SubLeaf1Eax {
289 uint32_t value;
290 struct {
291 uint32_t sha512 : 1,
292 : 22,
293 avx_ifma : 1,
294 : 8;
295 } bits;
296 };
297
298 union SefCpuid7SubLeaf1Edx {
299 uint32_t value;
300 struct {
301 uint32_t : 19,
302 avx10 : 1,
303 : 1,
304 apx_f : 1,
305 : 10;
306 } bits;
307 };
308
309 union StdCpuidEax29Ecx0 {
310 uint32_t value;
311 struct {
312 uint32_t apx_nci_ndd_nf : 1,
313 : 31;
314 } bits;
315 };
316
317 union StdCpuid24MainLeafEax {
318 uint32_t value;
319 struct {
320 uint32_t sub_leaves_cnt : 31;
321 } bits;
322 };
323
324 union StdCpuid24MainLeafEbx {
325 uint32_t value;
326 struct {
327 uint32_t avx10_converged_isa_version : 8,
328 : 8,
329 : 2,
330 avx10_vlen_512 : 1,
331 : 13;
332 } bits;
333 };
334
335 union ExtCpuid1EEbx {
336 uint32_t value;
337 struct {
338 uint32_t : 8,
339 threads_per_core : 8,
340 : 16;
341 } bits;
342 };
343
344 union XemXcr0Eax {
345 uint32_t value;
346 struct {
347 uint32_t x87 : 1,
348 sse : 1,
349 ymm : 1,
350 bndregs : 1,
351 bndcsr : 1,
352 opmask : 1,
353 zmm512 : 1,
354 zmm32 : 1,
355 : 11,
356 apx_f : 1,
357 : 12;
358 } bits;
359 };
360
361 protected:
362 static int _cpu;
363 static int _model;
364 static int _stepping;
365
366 static bool _has_intel_jcc_erratum;
367
368 static address _cpuinfo_segv_addr; // address of instruction which causes SEGV
369 static address _cpuinfo_cont_addr; // address of instruction after the one which causes SEGV
370 static address _cpuinfo_segv_addr_apx; // address of instruction which causes APX specific SEGV
371 static address _cpuinfo_cont_addr_apx; // address of instruction after the one which causes APX specific SEGV
372
373 /*
374 * Update following files when declaring new flags:
375 * test/lib-test/jdk/test/whitebox/CPUInfoTest.java
376 * src/jdk.internal.vm.ci/share/classes/jdk/vm/ci/amd64/AMD64.java
377 */
378 enum Feature_Flag {
379 #define CPU_FEATURE_FLAGS(decl) \
380 decl(CX8, cx8, 0) /* next bits are from cpuid 1 (EDX) */ \
381 decl(CMOV, cmov, 1) \
382 decl(FXSR, fxsr, 2) \
383 decl(HT, ht, 3) \
384 \
385 decl(MMX, mmx, 4) \
386 decl(3DNOW_PREFETCH, 3dnowpref, 5) /* Processor supports 3dnow prefetch and prefetchw instructions */ \
387 /* may not necessarily support other 3dnow instructions */ \
388 decl(SSE, sse, 6) \
389 decl(SSE2, sse2, 7) \
390 \
391 decl(SSE3, sse3, 8 ) /* SSE3 comes from cpuid 1 (ECX) */ \
392 decl(SSSE3, ssse3, 9 ) \
393 decl(SSE4A, sse4a, 10) \
394 decl(SSE4_1, sse4.1, 11) \
395 \
396 decl(SSE4_2, sse4.2, 12) \
397 decl(POPCNT, popcnt, 13) \
398 decl(LZCNT, lzcnt, 14) \
399 decl(TSC, tsc, 15) \
400 \
401 decl(TSCINV_BIT, tscinvbit, 16) \
402 decl(TSCINV, tscinv, 17) \
403 decl(AVX, avx, 18) \
404 decl(AVX2, avx2, 19) \
405 \
406 decl(AES, aes, 20) \
407 decl(ERMS, erms, 21) /* enhanced 'rep movsb/stosb' instructions */ \
408 decl(CLMUL, clmul, 22) /* carryless multiply for CRC */ \
409 decl(BMI1, bmi1, 23) \
410 \
411 decl(BMI2, bmi2, 24) \
412 decl(RTM, rtm, 25) /* Restricted Transactional Memory instructions */ \
413 decl(ADX, adx, 26) \
414 decl(AVX512F, avx512f, 27) /* AVX 512bit foundation instructions */ \
415 \
416 decl(AVX512DQ, avx512dq, 28) \
417 decl(AVX512PF, avx512pf, 29) \
418 decl(AVX512ER, avx512er, 30) \
419 decl(AVX512CD, avx512cd, 31) \
420 \
421 decl(AVX512BW, avx512bw, 32) /* Byte and word vector instructions */ \
422 decl(AVX512VL, avx512vl, 33) /* EVEX instructions with smaller vector length */ \
423 decl(SHA, sha, 34) /* SHA instructions */ \
424 decl(FMA, fma, 35) /* FMA instructions */ \
425 \
426 decl(VZEROUPPER, vzeroupper, 36) /* Vzeroupper instruction */ \
427 decl(AVX512_VPOPCNTDQ, avx512_vpopcntdq, 37) /* Vector popcount */ \
428 decl(AVX512_VPCLMULQDQ, avx512_vpclmulqdq, 38) /* Vector carryless multiplication */ \
429 decl(AVX512_VAES, avx512_vaes, 39) /* Vector AES instruction */ \
430 \
431 decl(AVX512_VNNI, avx512_vnni, 40) /* Vector Neural Network Instructions */ \
432 decl(FLUSH, clflush, 41) /* flush instruction */ \
433 decl(FLUSHOPT, clflushopt, 42) /* flusopth instruction */ \
434 decl(CLWB, clwb, 43) /* clwb instruction */ \
435 \
436 decl(AVX512_VBMI2, avx512_vbmi2, 44) /* VBMI2 shift left double instructions */ \
437 decl(AVX512_VBMI, avx512_vbmi, 45) /* Vector BMI instructions */ \
438 decl(HV, hv, 46) /* Hypervisor instructions */ \
439 decl(SERIALIZE, serialize, 47) /* CPU SERIALIZE */ \
440 decl(RDTSCP, rdtscp, 48) /* RDTSCP instruction */ \
441 decl(RDPID, rdpid, 49) /* RDPID instruction */ \
442 decl(FSRM, fsrm, 50) /* Fast Short REP MOV */ \
443 decl(GFNI, gfni, 51) /* Vector GFNI instructions */ \
444 decl(AVX512_BITALG, avx512_bitalg, 52) /* Vector sub-word popcount and bit gather instructions */\
445 decl(F16C, f16c, 53) /* Half-precision and single precision FP conversion instructions*/ \
446 decl(PKU, pku, 54) /* Protection keys for user-mode pages */ \
447 decl(OSPKE, ospke, 55) /* OS enables protection keys */ \
448 decl(CET_IBT, cet_ibt, 56) /* Control Flow Enforcement - Indirect Branch Tracking */ \
449 decl(CET_SS, cet_ss, 57) /* Control Flow Enforcement - Shadow Stack */ \
450 decl(AVX512_IFMA, avx512_ifma, 58) /* Integer Vector FMA instructions*/ \
451 decl(AVX_IFMA, avx_ifma, 59) /* 256-bit VEX-coded variant of AVX512-IFMA*/ \
452 decl(APX_F, apx_f, 60) /* Intel Advanced Performance Extensions*/ \
453 decl(SHA512, sha512, 61) /* SHA512 instructions*/ \
454 decl(AVX512_FP16, avx512_fp16, 62) /* AVX512 FP16 ISA support*/ \
455 decl(AVX10_1, avx10_1, 63) /* AVX10 512 bit vector ISA Version 1 support*/ \
456 decl(AVX10_2, avx10_2, 64) /* AVX10 512 bit vector ISA Version 2 support*/ \
457 decl(HYBRID, hybrid, 65) /* Hybrid architecture */
458
459 #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (bit),
460 CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
461 #undef DECLARE_CPU_FEATURE_FLAG
462 MAX_CPU_FEATURES
463 };
464
465 class VM_Features {
466 friend class VMStructs;
467 friend class JVMCIVMStructs;
468
469 private:
470 uint64_t _features_bitmap[(MAX_CPU_FEATURES / BitsPerLong) + 1];
471
472 STATIC_ASSERT(sizeof(_features_bitmap) * BitsPerByte >= MAX_CPU_FEATURES);
473
474 // Number of 8-byte elements in _bitmap.
475 constexpr static int features_bitmap_element_count() {
476 return sizeof(_features_bitmap) / sizeof(uint64_t);
477 }
478
479 constexpr static int features_bitmap_element_shift_count() {
480 return LogBitsPerLong;
481 }
482
483 constexpr static uint64_t features_bitmap_element_mask() {
484 return (1ULL << features_bitmap_element_shift_count()) - 1;
485 }
486
487 static int index(Feature_Flag feature) {
488 int idx = feature >> features_bitmap_element_shift_count();
489 assert(idx < features_bitmap_element_count(), "Features array index out of bounds");
490 return idx;
491 }
492
493 static uint64_t bit_mask(Feature_Flag feature) {
494 return (1ULL << (feature & features_bitmap_element_mask()));
495 }
496
497 static int _features_bitmap_size; // for JVMCI purposes
498
499 public:
500 VM_Features() {
501 for (int i = 0; i < features_bitmap_element_count(); i++) {
502 _features_bitmap[i] = 0;
503 }
504 }
505
506 void set_feature(Feature_Flag feature) {
507 int idx = index(feature);
508 _features_bitmap[idx] |= bit_mask(feature);
509 }
510
511 void clear_feature(VM_Version::Feature_Flag feature) {
512 int idx = index(feature);
513 _features_bitmap[idx] &= ~bit_mask(feature);
514 }
515
516 bool supports_feature(VM_Version::Feature_Flag feature) {
517 int idx = index(feature);
518 return (_features_bitmap[idx] & bit_mask(feature)) != 0;
519 }
520
521 bool supports_features(VM_Features* features_to_test) {
522 for (int i = 0; i < features_bitmap_element_count(); i++) {
523 if ((_features_bitmap[i] & features_to_test->_features_bitmap[i]) != features_to_test->_features_bitmap[i]) {
524 return false;
525 }
526 }
527 return true;
528 }
529 };
530
531 // CPU feature flags vector, can be affected by VM settings.
532 static VM_Features _features;
533
534 // Original CPU feature flags vector, not affected by VM settings.
535 static VM_Features _cpu_features;
536
537 static const char* _features_names[];
538
539 static void clear_cpu_features() {
540 _features = VM_Features();
541 _cpu_features = VM_Features();
542 }
543
544 enum Extended_Family {
545 // AMD
546 CPU_FAMILY_AMD_11H = 0x11,
547 CPU_FAMILY_AMD_17H = 0x17, /* Zen1 & Zen2 */
548 CPU_FAMILY_AMD_19H = 0x19, /* Zen3 & Zen4 */
549 // ZX
550 CPU_FAMILY_ZX_CORE_F6 = 6,
551 CPU_FAMILY_ZX_CORE_F7 = 7,
552 // Intel
553 CPU_FAMILY_INTEL_CORE = 6,
554 CPU_MODEL_NEHALEM = 0x1e,
555 CPU_MODEL_NEHALEM_EP = 0x1a,
556 CPU_MODEL_NEHALEM_EX = 0x2e,
557 CPU_MODEL_WESTMERE = 0x25,
558 CPU_MODEL_WESTMERE_EP = 0x2c,
559 CPU_MODEL_WESTMERE_EX = 0x2f,
560 CPU_MODEL_SANDYBRIDGE = 0x2a,
561 CPU_MODEL_SANDYBRIDGE_EP = 0x2d,
562 CPU_MODEL_IVYBRIDGE_EP = 0x3a,
563 CPU_MODEL_HASWELL_E3 = 0x3c,
564 CPU_MODEL_HASWELL_E7 = 0x3f,
565 CPU_MODEL_BROADWELL = 0x3d,
566 CPU_MODEL_SKYLAKE = 0x55
567 };
568
569 // cpuid information block. All info derived from executing cpuid with
570 // various function numbers is stored here. Intel and AMD info is
571 // merged in this block: accessor methods disentangle it.
572 //
573 // The info block is laid out in subblocks of 4 dwords corresponding to
574 // eax, ebx, ecx and edx, whether or not they contain anything useful.
575 class CpuidInfo {
576 public:
577 // cpuid function 0
578 uint32_t std_max_function;
579 uint32_t std_vendor_name_0;
580 uint32_t std_vendor_name_1;
581 uint32_t std_vendor_name_2;
582
583 // cpuid function 1
584 StdCpuid1Eax std_cpuid1_eax;
585 StdCpuid1Ebx std_cpuid1_ebx;
586 StdCpuid1Ecx std_cpuid1_ecx;
587 StdCpuid1Edx std_cpuid1_edx;
588
589 // cpuid function 4 (deterministic cache parameters)
590 DcpCpuid4Eax dcp_cpuid4_eax;
591 DcpCpuid4Ebx dcp_cpuid4_ebx;
592 uint32_t dcp_cpuid4_ecx; // unused currently
593 uint32_t dcp_cpuid4_edx; // unused currently
594
595 // cpuid function 7 (structured extended features enumeration leaf)
596 // eax = 7, ecx = 0
597 SefCpuid7Eax sef_cpuid7_eax;
598 SefCpuid7Ebx sef_cpuid7_ebx;
599 SefCpuid7Ecx sef_cpuid7_ecx;
600 SefCpuid7Edx sef_cpuid7_edx;
601
602 // cpuid function 7 (structured extended features enumeration sub-leaf 1)
603 // eax = 7, ecx = 1
604 SefCpuid7SubLeaf1Eax sefsl1_cpuid7_eax;
605 SefCpuid7SubLeaf1Edx sefsl1_cpuid7_edx;
606
607 // cpuid function 24 converged vector ISA main leaf
608 // eax = 24, ecx = 0
609 StdCpuid24MainLeafEax std_cpuid24_eax;
610 StdCpuid24MainLeafEbx std_cpuid24_ebx;
611
612 // cpuid function 0x29 APX Advanced Performance Extensions Leaf
613 // eax = 0x29, ecx = 0
614 StdCpuidEax29Ecx0 std_cpuid29_ebx;
615
616 // cpuid function 0xB (processor topology)
617 // ecx = 0
618 uint32_t tpl_cpuidB0_eax;
619 TplCpuidBEbx tpl_cpuidB0_ebx;
620 uint32_t tpl_cpuidB0_ecx; // unused currently
621 uint32_t tpl_cpuidB0_edx; // unused currently
622
623 // ecx = 1
624 uint32_t tpl_cpuidB1_eax;
625 TplCpuidBEbx tpl_cpuidB1_ebx;
626 uint32_t tpl_cpuidB1_ecx; // unused currently
627 uint32_t tpl_cpuidB1_edx; // unused currently
628
629 // ecx = 2
630 uint32_t tpl_cpuidB2_eax;
631 TplCpuidBEbx tpl_cpuidB2_ebx;
632 uint32_t tpl_cpuidB2_ecx; // unused currently
633 uint32_t tpl_cpuidB2_edx; // unused currently
634
635 // cpuid function 0x80000000 // example, unused
636 uint32_t ext_max_function;
637 uint32_t ext_vendor_name_0;
638 uint32_t ext_vendor_name_1;
639 uint32_t ext_vendor_name_2;
640
641 // cpuid function 0x80000001
642 uint32_t ext_cpuid1_eax; // reserved
643 uint32_t ext_cpuid1_ebx; // reserved
644 ExtCpuid1Ecx ext_cpuid1_ecx;
645 ExtCpuid1Edx ext_cpuid1_edx;
646
647 // cpuid functions 0x80000002 thru 0x80000004: example, unused
648 uint32_t proc_name_0, proc_name_1, proc_name_2, proc_name_3;
649 uint32_t proc_name_4, proc_name_5, proc_name_6, proc_name_7;
650 uint32_t proc_name_8, proc_name_9, proc_name_10,proc_name_11;
651
652 // cpuid function 0x80000005 // AMD L1, Intel reserved
653 uint32_t ext_cpuid5_eax; // unused currently
654 uint32_t ext_cpuid5_ebx; // reserved
655 ExtCpuid5Ex ext_cpuid5_ecx; // L1 data cache info (AMD)
656 ExtCpuid5Ex ext_cpuid5_edx; // L1 instruction cache info (AMD)
657
658 // cpuid function 0x80000007
659 uint32_t ext_cpuid7_eax; // reserved
660 uint32_t ext_cpuid7_ebx; // reserved
661 uint32_t ext_cpuid7_ecx; // reserved
662 ExtCpuid7Edx ext_cpuid7_edx; // tscinv
663
664 // cpuid function 0x80000008
665 uint32_t ext_cpuid8_eax; // unused currently
666 uint32_t ext_cpuid8_ebx; // reserved
667 ExtCpuid8Ecx ext_cpuid8_ecx;
668 uint32_t ext_cpuid8_edx; // reserved
669
670 // cpuid function 0x8000001E // AMD 17h
671 uint32_t ext_cpuid1E_eax;
672 ExtCpuid1EEbx ext_cpuid1E_ebx; // threads per core (AMD17h)
673 uint32_t ext_cpuid1E_ecx;
674 uint32_t ext_cpuid1E_edx; // unused currently
675
676 // extended control register XCR0 (the XFEATURE_ENABLED_MASK register)
677 XemXcr0Eax xem_xcr0_eax;
678 uint32_t xem_xcr0_edx; // reserved
679
680 // Space to save ymm registers after signal handle
681 int ymm_save[8*4]; // Save ymm0, ymm7, ymm8, ymm15
682
683 // Space to save zmm registers after signal handle
684 int zmm_save[16*4]; // Save zmm0, zmm7, zmm8, zmm31
685
686 // Space to save apx registers after signal handle
687 jlong apx_save[2]; // Save r16 and r31
688
689 VM_Features feature_flags() const;
690
691 // Asserts
692 void assert_is_initialized() const {
693 assert(std_cpuid1_eax.bits.family != 0, "VM_Version not initialized");
694 }
695
696 // Extractors
697 uint32_t extended_cpu_family() const {
698 uint32_t result = std_cpuid1_eax.bits.family;
699 result += std_cpuid1_eax.bits.ext_family;
700 return result;
701 }
702
703 uint32_t extended_cpu_model() const {
704 uint32_t result = std_cpuid1_eax.bits.model;
705 result |= std_cpuid1_eax.bits.ext_model << 4;
706 return result;
707 }
708
709 uint32_t cpu_stepping() const {
710 uint32_t result = std_cpuid1_eax.bits.stepping;
711 return result;
712 }
713 };
714
715 private:
716 // The actual cpuid info block
717 static CpuidInfo _cpuid_info;
718
719 // Extractors and predicates
720 static uint logical_processor_count() {
721 uint result = threads_per_core();
722 return result;
723 }
724
725 static bool compute_has_intel_jcc_erratum();
726
727 static bool os_supports_avx_vectors();
728 static bool os_supports_apx_egprs();
729 static void get_processor_features();
730
731 public:
732 // Offsets for cpuid asm stub
733 static ByteSize std_cpuid0_offset() { return byte_offset_of(CpuidInfo, std_max_function); }
734 static ByteSize std_cpuid1_offset() { return byte_offset_of(CpuidInfo, std_cpuid1_eax); }
735 static ByteSize std_cpuid24_offset() { return byte_offset_of(CpuidInfo, std_cpuid24_eax); }
736 static ByteSize std_cpuid29_offset() { return byte_offset_of(CpuidInfo, std_cpuid29_ebx); }
737 static ByteSize dcp_cpuid4_offset() { return byte_offset_of(CpuidInfo, dcp_cpuid4_eax); }
738 static ByteSize sef_cpuid7_offset() { return byte_offset_of(CpuidInfo, sef_cpuid7_eax); }
739 static ByteSize sefsl1_cpuid7_offset() { return byte_offset_of(CpuidInfo, sefsl1_cpuid7_eax); }
740 static ByteSize ext_cpuid1_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1_eax); }
741 static ByteSize ext_cpuid5_offset() { return byte_offset_of(CpuidInfo, ext_cpuid5_eax); }
742 static ByteSize ext_cpuid7_offset() { return byte_offset_of(CpuidInfo, ext_cpuid7_eax); }
743 static ByteSize ext_cpuid8_offset() { return byte_offset_of(CpuidInfo, ext_cpuid8_eax); }
744 static ByteSize ext_cpuid1E_offset() { return byte_offset_of(CpuidInfo, ext_cpuid1E_eax); }
745 static ByteSize tpl_cpuidB0_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB0_eax); }
746 static ByteSize tpl_cpuidB1_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB1_eax); }
747 static ByteSize tpl_cpuidB2_offset() { return byte_offset_of(CpuidInfo, tpl_cpuidB2_eax); }
748 static ByteSize xem_xcr0_offset() { return byte_offset_of(CpuidInfo, xem_xcr0_eax); }
749 static ByteSize ymm_save_offset() { return byte_offset_of(CpuidInfo, ymm_save); }
750 static ByteSize zmm_save_offset() { return byte_offset_of(CpuidInfo, zmm_save); }
751 static ByteSize apx_save_offset() { return byte_offset_of(CpuidInfo, apx_save); }
752
753 // The value used to check ymm register after signal handle
754 static int ymm_test_value() { return 0xCAFEBABE; }
755 static jlong egpr_test_value() { return 0xCAFEBABECAFEBABELL; }
756
757 static void get_cpu_info_wrapper();
758 static void set_cpuinfo_segv_addr(address pc) { _cpuinfo_segv_addr = pc; }
759 static bool is_cpuinfo_segv_addr(address pc) { return _cpuinfo_segv_addr == pc; }
760 static void set_cpuinfo_cont_addr(address pc) { _cpuinfo_cont_addr = pc; }
761 static address cpuinfo_cont_addr() { return _cpuinfo_cont_addr; }
762
763 static void set_cpuinfo_segv_addr_apx(address pc) { _cpuinfo_segv_addr_apx = pc; }
764 static bool is_cpuinfo_segv_addr_apx(address pc) { return _cpuinfo_segv_addr_apx == pc; }
765 static void set_cpuinfo_cont_addr_apx(address pc) { _cpuinfo_cont_addr_apx = pc; }
766 static address cpuinfo_cont_addr_apx() { return _cpuinfo_cont_addr_apx; }
767
768 static void clear_apx_test_state();
769
770 static void clean_cpuFeatures() {
771 VM_Version::clear_cpu_features();
772 }
773 static void set_avx_cpuFeatures() {
774 _features.set_feature(CPU_SSE);
775 _features.set_feature(CPU_SSE2);
776 _features.set_feature(CPU_AVX);
777 _features.set_feature(CPU_VZEROUPPER);
778 }
779 static void set_evex_cpuFeatures() {
780 _features.set_feature(CPU_AVX10_1);
781 _features.set_feature(CPU_AVX512F);
782 _features.set_feature(CPU_SSE);
783 _features.set_feature(CPU_SSE2);
784 _features.set_feature(CPU_VZEROUPPER);
785 }
786 static void set_apx_cpuFeatures() {
787 _features.set_feature(CPU_APX_F);
788 }
789 static void set_bmi_cpuFeatures() {
790 _features.set_feature(CPU_BMI1);
791 _features.set_feature(CPU_BMI2);
792 _features.set_feature(CPU_LZCNT);
793 _features.set_feature(CPU_POPCNT);
794 }
795
796 // Initialization
797 static void initialize();
798
799 // Override Abstract_VM_Version implementation
800 static void print_platform_virtualization_info(outputStream*);
801
802 //
803 // Processor family:
804 // 3 - 386
805 // 4 - 486
806 // 5 - Pentium
807 // 6 - PentiumPro, Pentium II, Celeron, Xeon, Pentium III, Athlon,
808 // Pentium M, Core Solo, Core Duo, Core2 Duo
809 // family 6 model: 9, 13, 14, 15
810 // 0x0f - Pentium 4, Opteron
811 //
812 // Note: The cpu family should be used to select between
813 // instruction sequences which are valid on all Intel
814 // processors. Use the feature test functions below to
815 // determine whether a particular instruction is supported.
816 //
817 static void assert_is_initialized() { _cpuid_info.assert_is_initialized(); }
818 static uint32_t extended_cpu_family() { return _cpuid_info.extended_cpu_family(); }
819 static uint32_t extended_cpu_model() { return _cpuid_info.extended_cpu_model(); }
820 static uint32_t cpu_stepping() { return _cpuid_info.cpu_stepping(); }
821 static int cpu_family() { return _cpu;}
822 static bool is_P6() { return cpu_family() >= 6; }
823 static bool is_intel_server_family() { return cpu_family() == 6 || cpu_family() == 19; }
824 static bool is_amd() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x68747541; } // 'htuA'
825 static bool is_hygon() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x6F677948; } // 'ogyH'
826 static bool is_amd_family() { return is_amd() || is_hygon(); }
827 static bool is_intel() { assert_is_initialized(); return _cpuid_info.std_vendor_name_0 == 0x756e6547; } // 'uneG'
828 static bool is_zx() { assert_is_initialized(); return (_cpuid_info.std_vendor_name_0 == 0x746e6543) || (_cpuid_info.std_vendor_name_0 == 0x68532020); } // 'tneC'||'hS '
829 static bool is_atom_family() { return ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x36) || (extended_cpu_model() == 0x37) || (extended_cpu_model() == 0x4D))); } //Silvermont and Centerton
830 static bool is_knights_family() { return UseKNLSetting || ((cpu_family() == 0x06) && ((extended_cpu_model() == 0x57) || (extended_cpu_model() == 0x85))); } // Xeon Phi 3200/5200/7200 and Future Xeon Phi
831
832 static bool supports_processor_topology() {
833 return (_cpuid_info.std_max_function >= 0xB) &&
834 // eax[4:0] | ebx[0:15] == 0 indicates invalid topology level.
835 // Some cpus have max cpuid >= 0xB but do not support processor topology.
836 (((_cpuid_info.tpl_cpuidB0_eax & 0x1f) | _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus) != 0);
837 }
838
839 static uint cores_per_cpu();
840 static uint threads_per_core();
841 static uint L1_line_size();
842
843 static uint prefetch_data_size() {
844 return L1_line_size();
845 }
846
847 //
848 // Feature identification which can be affected by VM settings
849 //
850 static bool supports_cmov() { return _features.supports_feature(CPU_CMOV); }
851 static bool supports_fxsr() { return _features.supports_feature(CPU_FXSR); }
852 static bool supports_ht() { return _features.supports_feature(CPU_HT); }
853 static bool supports_mmx() { return _features.supports_feature(CPU_MMX); }
854 static bool supports_sse() { return _features.supports_feature(CPU_SSE); }
855 static bool supports_sse2() { return _features.supports_feature(CPU_SSE2); }
856 static bool supports_sse3() { return _features.supports_feature(CPU_SSE3); }
857 static bool supports_ssse3() { return _features.supports_feature(CPU_SSSE3); }
858 static bool supports_sse4_1() { return _features.supports_feature(CPU_SSE4_1); }
859 static bool supports_sse4_2() { return _features.supports_feature(CPU_SSE4_2); }
860 static bool supports_popcnt() { return _features.supports_feature(CPU_POPCNT); }
861 static bool supports_avx() { return _features.supports_feature(CPU_AVX); }
862 static bool supports_avx2() { return _features.supports_feature(CPU_AVX2); }
863 static bool supports_tsc() { return _features.supports_feature(CPU_TSC); }
864 static bool supports_rdtscp() { return _features.supports_feature(CPU_RDTSCP); }
865 static bool supports_rdpid() { return _features.supports_feature(CPU_RDPID); }
866 static bool supports_aes() { return _features.supports_feature(CPU_AES); }
867 static bool supports_erms() { return _features.supports_feature(CPU_ERMS); }
868 static bool supports_fsrm() { return _features.supports_feature(CPU_FSRM); }
869 static bool supports_clmul() { return _features.supports_feature(CPU_CLMUL); }
870 static bool supports_rtm() { return _features.supports_feature(CPU_RTM); }
871 static bool supports_bmi1() { return _features.supports_feature(CPU_BMI1); }
872 static bool supports_bmi2() { return _features.supports_feature(CPU_BMI2); }
873 static bool supports_adx() { return _features.supports_feature(CPU_ADX); }
874 static bool supports_evex() { return _features.supports_feature(CPU_AVX512F); }
875 static bool supports_avx512dq() { return _features.supports_feature(CPU_AVX512DQ); }
876 static bool supports_avx512ifma() { return _features.supports_feature(CPU_AVX512_IFMA); }
877 static bool supports_avxifma() { return _features.supports_feature(CPU_AVX_IFMA); }
878 static bool supports_avx512pf() { return _features.supports_feature(CPU_AVX512PF); }
879 static bool supports_avx512er() { return _features.supports_feature(CPU_AVX512ER); }
880 static bool supports_avx512cd() { return _features.supports_feature(CPU_AVX512CD); }
881 static bool supports_avx512bw() { return _features.supports_feature(CPU_AVX512BW); }
882 static bool supports_avx512vl() { return _features.supports_feature(CPU_AVX512VL); }
883 static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
884 static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); }
885 static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
886 static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
887 supports_avx512bw() && supports_avx512dq()); }
888 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
889 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
890 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
891 static bool supports_apx_f() { return _features.supports_feature(CPU_APX_F); }
892 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
893 static bool supports_sha() { return _features.supports_feature(CPU_SHA); }
894 static bool supports_fma() { return _features.supports_feature(CPU_FMA) && supports_avx(); }
895 static bool supports_vzeroupper() { return _features.supports_feature(CPU_VZEROUPPER); }
896 static bool supports_avx512_vpopcntdq() { return _features.supports_feature(CPU_AVX512_VPOPCNTDQ); }
897 static bool supports_avx512_vpclmulqdq() { return _features.supports_feature(CPU_AVX512_VPCLMULQDQ); }
898 static bool supports_avx512_vaes() { return _features.supports_feature(CPU_AVX512_VAES); }
899 static bool supports_gfni() { return _features.supports_feature(CPU_GFNI); }
900 static bool supports_avx512_vnni() { return _features.supports_feature(CPU_AVX512_VNNI); }
901 static bool supports_avx512_bitalg() { return _features.supports_feature(CPU_AVX512_BITALG); }
902 static bool supports_avx512_vbmi() { return _features.supports_feature(CPU_AVX512_VBMI); }
903 static bool supports_avx512_vbmi2() { return _features.supports_feature(CPU_AVX512_VBMI2); }
904 static bool supports_avx512_fp16() { return _features.supports_feature(CPU_AVX512_FP16); }
905 static bool supports_hv() { return _features.supports_feature(CPU_HV); }
906 static bool supports_serialize() { return _features.supports_feature(CPU_SERIALIZE); }
907 static bool supports_hybrid() { return _features.supports_feature(CPU_HYBRID); }
908 static bool supports_f16c() { return _features.supports_feature(CPU_F16C); }
909 static bool supports_pku() { return _features.supports_feature(CPU_PKU); }
910 static bool supports_ospke() { return _features.supports_feature(CPU_OSPKE); }
911 static bool supports_cet_ss() { return _features.supports_feature(CPU_CET_SS); }
912 static bool supports_cet_ibt() { return _features.supports_feature(CPU_CET_IBT); }
913 static bool supports_sha512() { return _features.supports_feature(CPU_SHA512); }
914
915 // IntelĀ® AVX10 introduces a versioned approach for enumeration that is monotonically increasing, inclusive,
916 // and supporting all vector lengths. Feature set supported by an AVX10 vector ISA version is also supported
917 // by all the versions above it.
918 static bool supports_avx10_1() { return _features.supports_feature(CPU_AVX10_1);}
919 static bool supports_avx10_2() { return _features.supports_feature(CPU_AVX10_2);}
920
921 //
922 // Feature identification not affected by VM flags
923 //
924 static bool cpu_supports_evex() { return _cpu_features.supports_feature(CPU_AVX512F); }
925
926 static bool supports_avx512_simd_sort() {
927 if (supports_avx512dq()) {
928 // Disable AVX512 version of SIMD Sort on AMD Zen4 Processors.
929 if (is_amd() && cpu_family() == CPU_FAMILY_AMD_19H) {
930 return false;
931 }
932 return true;
933 }
934 return false;
935 }
936
937 // Intel features
938 static bool is_intel_family_core() { return is_intel() &&
939 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
940
941 static bool is_intel_skylake() { return is_intel_family_core() &&
942 extended_cpu_model() == CPU_MODEL_SKYLAKE; }
943
944 #ifdef COMPILER2
945 // Determine if it's running on Cascade Lake using default options.
946 static bool is_default_intel_cascade_lake();
947 #endif
948
949 static bool is_intel_cascade_lake();
950
951 static bool is_intel_darkmont();
952
953 static int avx3_threshold();
954
955 static bool is_intel_tsc_synched_at_init();
956
957 static void insert_features_names(VM_Version::VM_Features features, stringStream& ss);
958
959 // This checks if the JVM is potentially affected by an erratum on Intel CPUs (SKX102)
960 // that causes unpredictable behaviour when jcc crosses 64 byte boundaries. Its microcode
961 // mitigation causes regressions when jumps or fused conditional branches cross or end at
962 // 32 byte boundaries.
963 static bool has_intel_jcc_erratum() { return _has_intel_jcc_erratum; }
964
965 // AMD features
966 static bool supports_3dnow_prefetch() { return _features.supports_feature(CPU_3DNOW_PREFETCH); }
967 static bool supports_lzcnt() { return _features.supports_feature(CPU_LZCNT); }
968 static bool supports_sse4a() { return _features.supports_feature(CPU_SSE4A); }
969
970 static bool is_amd_Barcelona() { return is_amd() &&
971 extended_cpu_family() == CPU_FAMILY_AMD_11H; }
972
973 // Intel and AMD newer cores support fast timestamps well
974 static bool supports_tscinv_bit() {
975 return _features.supports_feature(CPU_TSCINV_BIT);
976 }
977 static bool supports_tscinv() {
978 return _features.supports_feature(CPU_TSCINV);
979 }
980
981 // Intel Core and newer cpus have fast IDIV instruction (excluding Atom).
982 static bool has_fast_idiv() { return is_intel() && is_intel_server_family() &&
983 supports_sse3() && _model != 0x1C; }
984
985 static bool supports_compare_and_exchange() { return true; }
986
987 static int allocate_prefetch_distance(bool use_watermark_prefetch);
988
989 // SSE2 and later processors implement a 'pause' instruction
990 // that can be used for efficient implementation of
991 // the intrinsic for java.lang.Thread.onSpinWait()
992 static bool supports_on_spin_wait() { return supports_sse2(); }
993
994 // x86_64 supports fast class initialization checks
995 static bool supports_fast_class_init_checks() {
996 return true;
997 }
998
999 // x86_64 supports secondary supers table
1000 constexpr static bool supports_secondary_supers_table() {
1001 return true;
1002 }
1003
1004 constexpr static bool supports_stack_watermark_barrier() {
1005 return true;
1006 }
1007
1008 constexpr static bool supports_recursive_lightweight_locking() {
1009 return true;
1010 }
1011
1012 // For AVX CPUs only. f16c support is disabled if UseAVX == 0.
1013 static bool supports_float16() {
1014 return supports_f16c() || supports_avx512vl() || supports_avx512_fp16();
1015 }
1016
1017 // Check intrinsic support
1018 static bool is_intrinsic_supported(vmIntrinsicID id);
1019
1020 // there are several insns to force cache line sync to memory which
1021 // we can use to ensure mapped non-volatile memory is up to date with
1022 // pending in-cache changes.
1023 //
1024 // 64 bit cpus always support clflush which writes back and evicts
1025 // on 32 bit cpus support is recorded via a feature flag
1026 //
1027 // clflushopt is optional and acts like clflush except it does
1028 // not synchronize with other memory ops. it needs a preceding
1029 // and trailing StoreStore fence
1030 //
1031 // clwb is an optional intel-specific instruction which
1032 // writes back without evicting the line. it also does not
1033 // synchronize with other memory ops. so, it needs preceding
1034 // and trailing StoreStore fences.
1035
1036 static bool supports_clflush(); // Can't inline due to header file conflict
1037
1038 // Note: CPU_FLUSHOPT and CPU_CLWB bits should always be zero for 32-bit
1039 static bool supports_clflushopt() { return (_features.supports_feature(CPU_FLUSHOPT)); }
1040 static bool supports_clwb() { return (_features.supports_feature(CPU_CLWB)); }
1041
1042 // Old CPUs perform lea on AGU which causes additional latency transferring the
1043 // value from/to ALU for other operations
1044 static bool supports_fast_2op_lea() {
1045 return (is_intel() && supports_avx()) || // Sandy Bridge and above
1046 (is_amd() && supports_avx()); // Jaguar and Bulldozer and above
1047 }
1048
1049 // Pre Icelake Intels suffer inefficiency regarding 3-operand lea, which contains
1050 // all of base register, index register and displacement immediate, with 3 latency.
1051 // Note that when the address contains no displacement but the base register is
1052 // rbp or r13, the machine code must contain a zero displacement immediate,
1053 // effectively transform a 2-operand lea into a 3-operand lea. This can be
1054 // replaced by add-add or lea-add
1055 static bool supports_fast_3op_lea() {
1056 return supports_fast_2op_lea() &&
1057 ((is_intel() && supports_clwb() && !is_intel_skylake()) || // Icelake and above
1058 is_amd());
1059 }
1060
1061 #ifdef __APPLE__
1062 // Is the CPU running emulated (for example macOS Rosetta running x86_64 code on M1 ARM (aarch64)
1063 static bool is_cpu_emulated();
1064 #endif
1065
1066 // support functions for virtualization detection
1067 private:
1068 static void check_virtualizations();
1069
1070 static const char* cpu_family_description(void);
1071 static const char* cpu_model_description(void);
1072 static const char* cpu_brand(void);
1073 static const char* cpu_brand_string(void);
1074
1075 static int cpu_type_description(char* const buf, size_t buf_len);
1076 static int cpu_detailed_description(char* const buf, size_t buf_len);
1077 static int cpu_extended_brand_string(char* const buf, size_t buf_len);
1078
1079 static bool cpu_is_em64t(void);
1080 static bool is_netburst(void);
1081
1082 // Returns bytes written excluding termninating null byte.
1083 static size_t cpu_write_support_string(char* const buf, size_t buf_len);
1084 static void resolve_cpu_information_details(void);
1085 static int64_t max_qualified_cpu_freq_from_brand_string(void);
1086
1087 public:
1088 // Offsets for cpuid asm stub brand string
1089 static ByteSize proc_name_0_offset() { return byte_offset_of(CpuidInfo, proc_name_0); }
1090 static ByteSize proc_name_1_offset() { return byte_offset_of(CpuidInfo, proc_name_1); }
1091 static ByteSize proc_name_2_offset() { return byte_offset_of(CpuidInfo, proc_name_2); }
1092 static ByteSize proc_name_3_offset() { return byte_offset_of(CpuidInfo, proc_name_3); }
1093 static ByteSize proc_name_4_offset() { return byte_offset_of(CpuidInfo, proc_name_4); }
1094 static ByteSize proc_name_5_offset() { return byte_offset_of(CpuidInfo, proc_name_5); }
1095 static ByteSize proc_name_6_offset() { return byte_offset_of(CpuidInfo, proc_name_6); }
1096 static ByteSize proc_name_7_offset() { return byte_offset_of(CpuidInfo, proc_name_7); }
1097 static ByteSize proc_name_8_offset() { return byte_offset_of(CpuidInfo, proc_name_8); }
1098 static ByteSize proc_name_9_offset() { return byte_offset_of(CpuidInfo, proc_name_9); }
1099 static ByteSize proc_name_10_offset() { return byte_offset_of(CpuidInfo, proc_name_10); }
1100 static ByteSize proc_name_11_offset() { return byte_offset_of(CpuidInfo, proc_name_11); }
1101
1102 static int64_t maximum_qualified_cpu_frequency(void);
1103
1104 static bool supports_tscinv_ext(void);
1105
1106 static void initialize_cpu_information(void);
1107
1108 static void get_cpu_features_name(void* features_buffer, stringStream& ss);
1109 static void get_missing_features_name(void* features_buffer, stringStream& ss);
1110
1111 // Returns number of bytes required to store cpu features representation
1112 static int cpu_features_size();
1113
1114 // Stores cpu features representation in the provided buffer. This representation is arch dependent.
1115 // Size of the buffer must be same as returned by cpu_features_size()
1116 static void store_cpu_features(void* buf);
1117
1118 static bool supports_features(void* features_to_test);
1119 };
1120
1121 #endif // CPU_X86_VM_VERSION_X86_HPP