1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/codeBlob.hpp"
29 #include "compiler/compilerDefinitions.inline.hpp"
30 #include "jvm.h"
31 #include "logging/log.hpp"
32 #include "logging/logStream.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "memory/universe.hpp"
35 #include "runtime/globals_extension.hpp"
36 #include "runtime/java.hpp"
37 #include "runtime/os.inline.hpp"
38 #include "runtime/stubCodeGenerator.hpp"
39 #include "runtime/vm_version.hpp"
40 #include "utilities/checkedCast.hpp"
41 #include "utilities/ostream.hpp"
42 #include "utilities/powerOfTwo.hpp"
43 #include "utilities/virtualizationSupport.hpp"
44
45 int VM_Version::_cpu;
46 int VM_Version::_model;
47 int VM_Version::_stepping;
48 bool VM_Version::_has_intel_jcc_erratum;
49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
50
51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
53 #undef DECLARE_CPU_FEATURE_NAME
54
55 // Address of instruction which causes SEGV
56 address VM_Version::_cpuinfo_segv_addr = nullptr;
57 // Address of instruction after the one which causes SEGV
58 address VM_Version::_cpuinfo_cont_addr = nullptr;
59 // Address of instruction which causes APX specific SEGV
60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
61 // Address of instruction after the one which causes APX specific SEGV
62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
63
64 static BufferBlob* stub_blob;
65 static const int stub_size = 2550;
66
67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
68
69 VM_Version::VM_Features VM_Version::_features;
70 VM_Version::VM_Features VM_Version::_cpu_features;
71
72 extern "C" {
73 typedef void (*get_cpu_info_stub_t)(void*);
74 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
75 typedef void (*clear_apx_test_state_t)(void);
76 typedef void (*getCPUIDBrandString_stub_t)(void*);
77 }
78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
79 static detect_virt_stub_t detect_virt_stub = nullptr;
80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
82
83 bool VM_Version::supports_clflush() {
84 // clflush should always be available on x86_64
85 // if not we are in real trouble because we rely on it
86 // to flush the code cache.
87 // Unfortunately, Assembler::clflush is currently called as part
88 // of generation of the code cache flush routine. This happens
89 // under Universe::init before the processor features are set
90 // up. Assembler::flush calls this routine to check that clflush
91 // is allowed. So, we give the caller a free pass if Universe init
92 // is still in progress.
93 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
94 return true;
95 }
96
97 #define CPUID_STANDARD_FN 0x0
98 #define CPUID_STANDARD_FN_1 0x1
99 #define CPUID_STANDARD_FN_4 0x4
100 #define CPUID_STANDARD_FN_B 0xb
101
102 #define CPUID_EXTENDED_FN 0x80000000
103 #define CPUID_EXTENDED_FN_1 0x80000001
104 #define CPUID_EXTENDED_FN_2 0x80000002
105 #define CPUID_EXTENDED_FN_3 0x80000003
106 #define CPUID_EXTENDED_FN_4 0x80000004
107 #define CPUID_EXTENDED_FN_7 0x80000007
108 #define CPUID_EXTENDED_FN_8 0x80000008
109
110 class VM_Version_StubGenerator: public StubCodeGenerator {
111 public:
112
113 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
114
115 address clear_apx_test_state() {
116 # define __ _masm->
117 address start = __ pc();
118 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
119 // handling guarantees that preserved register values post signal handling were
120 // re-instantiated by operating system and not because they were not modified externally.
121
122 bool save_apx = UseAPX;
123 VM_Version::set_apx_cpuFeatures();
124 UseAPX = true;
125 // EGPR state save/restoration.
126 __ mov64(r16, 0L);
127 __ mov64(r31, 0L);
128 UseAPX = save_apx;
129 VM_Version::clean_cpuFeatures();
130 __ ret(0);
131 return start;
132 }
133
134 address generate_get_cpu_info() {
135 // Flags to test CPU type.
136 const uint32_t HS_EFL_AC = 0x40000;
137 const uint32_t HS_EFL_ID = 0x200000;
138 // Values for when we don't have a CPUID instruction.
139 const int CPU_FAMILY_SHIFT = 8;
140 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
141 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
142 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
143
144 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
145 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
146 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
147 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
148
149 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
150 # define __ _masm->
151
152 address start = __ pc();
153
154 //
155 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
156 //
157 // rcx and rdx are first and second argument registers on windows
158
159 __ push(rbp);
160 __ mov(rbp, c_rarg0); // cpuid_info address
161 __ push(rbx);
162 __ push(rsi);
163 __ pushf(); // preserve rbx, and flags
164 __ pop(rax);
165 __ push(rax);
166 __ mov(rcx, rax);
167 //
168 // if we are unable to change the AC flag, we have a 386
169 //
170 __ xorl(rax, HS_EFL_AC);
171 __ push(rax);
172 __ popf();
173 __ pushf();
174 __ pop(rax);
175 __ cmpptr(rax, rcx);
176 __ jccb(Assembler::notEqual, detect_486);
177
178 __ movl(rax, CPU_FAMILY_386);
179 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
180 __ jmp(done);
181
182 //
183 // If we are unable to change the ID flag, we have a 486 which does
184 // not support the "cpuid" instruction.
185 //
186 __ bind(detect_486);
187 __ mov(rax, rcx);
188 __ xorl(rax, HS_EFL_ID);
189 __ push(rax);
190 __ popf();
191 __ pushf();
192 __ pop(rax);
193 __ cmpptr(rcx, rax);
194 __ jccb(Assembler::notEqual, detect_586);
195
196 __ bind(cpu486);
197 __ movl(rax, CPU_FAMILY_486);
198 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
199 __ jmp(done);
200
201 //
202 // At this point, we have a chip which supports the "cpuid" instruction
203 //
204 __ bind(detect_586);
205 __ xorl(rax, rax);
206 __ cpuid();
207 __ orl(rax, rax);
208 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
209 // value of at least 1, we give up and
210 // assume a 486
211 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
212 __ movl(Address(rsi, 0), rax);
213 __ movl(Address(rsi, 4), rbx);
214 __ movl(Address(rsi, 8), rcx);
215 __ movl(Address(rsi,12), rdx);
216
217 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
218 __ jccb(Assembler::belowEqual, std_cpuid4);
219
220 //
221 // cpuid(0xB) Processor Topology
222 //
223 __ movl(rax, 0xb);
224 __ xorl(rcx, rcx); // Threads level
225 __ cpuid();
226
227 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
228 __ movl(Address(rsi, 0), rax);
229 __ movl(Address(rsi, 4), rbx);
230 __ movl(Address(rsi, 8), rcx);
231 __ movl(Address(rsi,12), rdx);
232
233 __ movl(rax, 0xb);
234 __ movl(rcx, 1); // Cores level
235 __ cpuid();
236 __ push(rax);
237 __ andl(rax, 0x1f); // Determine if valid topology level
238 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
239 __ andl(rax, 0xffff);
240 __ pop(rax);
241 __ jccb(Assembler::equal, std_cpuid4);
242
243 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
244 __ movl(Address(rsi, 0), rax);
245 __ movl(Address(rsi, 4), rbx);
246 __ movl(Address(rsi, 8), rcx);
247 __ movl(Address(rsi,12), rdx);
248
249 __ movl(rax, 0xb);
250 __ movl(rcx, 2); // Packages level
251 __ cpuid();
252 __ push(rax);
253 __ andl(rax, 0x1f); // Determine if valid topology level
254 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
255 __ andl(rax, 0xffff);
256 __ pop(rax);
257 __ jccb(Assembler::equal, std_cpuid4);
258
259 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
260 __ movl(Address(rsi, 0), rax);
261 __ movl(Address(rsi, 4), rbx);
262 __ movl(Address(rsi, 8), rcx);
263 __ movl(Address(rsi,12), rdx);
264
265 //
266 // cpuid(0x4) Deterministic cache params
267 //
268 __ bind(std_cpuid4);
269 __ movl(rax, 4);
270 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
271 __ jccb(Assembler::greater, std_cpuid1);
272
273 __ xorl(rcx, rcx); // L1 cache
274 __ cpuid();
275 __ push(rax);
276 __ andl(rax, 0x1f); // Determine if valid cache parameters used
277 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
278 __ pop(rax);
279 __ jccb(Assembler::equal, std_cpuid1);
280
281 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
282 __ movl(Address(rsi, 0), rax);
283 __ movl(Address(rsi, 4), rbx);
284 __ movl(Address(rsi, 8), rcx);
285 __ movl(Address(rsi,12), rdx);
286
287 //
288 // Standard cpuid(0x1)
289 //
290 __ bind(std_cpuid1);
291 __ movl(rax, 1);
292 __ cpuid();
293 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
294 __ movl(Address(rsi, 0), rax);
295 __ movl(Address(rsi, 4), rbx);
296 __ movl(Address(rsi, 8), rcx);
297 __ movl(Address(rsi,12), rdx);
298
299 //
300 // Check if OS has enabled XGETBV instruction to access XCR0
301 // (OSXSAVE feature flag) and CPU supports AVX
302 //
303 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
304 __ cmpl(rcx, 0x18000000);
305 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
306
307 //
308 // XCR0, XFEATURE_ENABLED_MASK register
309 //
310 __ xorl(rcx, rcx); // zero for XCR0 register
311 __ xgetbv();
312 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
313 __ movl(Address(rsi, 0), rax);
314 __ movl(Address(rsi, 4), rdx);
315
316 //
317 // cpuid(0x7) Structured Extended Features Enumeration Leaf.
318 //
319 __ bind(sef_cpuid);
320 __ movl(rax, 7);
321 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
322 __ jccb(Assembler::greater, ext_cpuid);
323 // ECX = 0
324 __ xorl(rcx, rcx);
325 __ cpuid();
326 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
327 __ movl(Address(rsi, 0), rax);
328 __ movl(Address(rsi, 4), rbx);
329 __ movl(Address(rsi, 8), rcx);
330 __ movl(Address(rsi, 12), rdx);
331
332 //
333 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
334 //
335 __ bind(sefsl1_cpuid);
336 __ movl(rax, 7);
337 __ movl(rcx, 1);
338 __ cpuid();
339 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
340 __ movl(Address(rsi, 0), rax);
341 __ movl(Address(rsi, 4), rdx);
342
343 //
344 // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
345 //
346 __ bind(std_cpuid29);
347 __ movl(rax, 0x29);
348 __ movl(rcx, 0);
349 __ cpuid();
350 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
351 __ movl(Address(rsi, 0), rbx);
352
353 //
354 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
355 //
356 __ bind(std_cpuid24);
357 __ movl(rax, 0x24);
358 __ movl(rcx, 0);
359 __ cpuid();
360 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
361 __ movl(Address(rsi, 0), rax);
362 __ movl(Address(rsi, 4), rbx);
363
364 //
365 // Extended cpuid(0x80000000)
366 //
367 __ bind(ext_cpuid);
368 __ movl(rax, 0x80000000);
369 __ cpuid();
370 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
371 __ jcc(Assembler::belowEqual, done);
372 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
373 __ jcc(Assembler::belowEqual, ext_cpuid1);
374 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
375 __ jccb(Assembler::belowEqual, ext_cpuid5);
376 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
377 __ jccb(Assembler::belowEqual, ext_cpuid7);
378 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
379 __ jccb(Assembler::belowEqual, ext_cpuid8);
380 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
381 __ jccb(Assembler::below, ext_cpuid8);
382 //
383 // Extended cpuid(0x8000001E)
384 //
385 __ movl(rax, 0x8000001E);
386 __ cpuid();
387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
388 __ movl(Address(rsi, 0), rax);
389 __ movl(Address(rsi, 4), rbx);
390 __ movl(Address(rsi, 8), rcx);
391 __ movl(Address(rsi,12), rdx);
392
393 //
394 // Extended cpuid(0x80000008)
395 //
396 __ bind(ext_cpuid8);
397 __ movl(rax, 0x80000008);
398 __ cpuid();
399 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
400 __ movl(Address(rsi, 0), rax);
401 __ movl(Address(rsi, 4), rbx);
402 __ movl(Address(rsi, 8), rcx);
403 __ movl(Address(rsi,12), rdx);
404
405 //
406 // Extended cpuid(0x80000007)
407 //
408 __ bind(ext_cpuid7);
409 __ movl(rax, 0x80000007);
410 __ cpuid();
411 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
412 __ movl(Address(rsi, 0), rax);
413 __ movl(Address(rsi, 4), rbx);
414 __ movl(Address(rsi, 8), rcx);
415 __ movl(Address(rsi,12), rdx);
416
417 //
418 // Extended cpuid(0x80000005)
419 //
420 __ bind(ext_cpuid5);
421 __ movl(rax, 0x80000005);
422 __ cpuid();
423 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
424 __ movl(Address(rsi, 0), rax);
425 __ movl(Address(rsi, 4), rbx);
426 __ movl(Address(rsi, 8), rcx);
427 __ movl(Address(rsi,12), rdx);
428
429 //
430 // Extended cpuid(0x80000001)
431 //
432 __ bind(ext_cpuid1);
433 __ movl(rax, 0x80000001);
434 __ cpuid();
435 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
436 __ movl(Address(rsi, 0), rax);
437 __ movl(Address(rsi, 4), rbx);
438 __ movl(Address(rsi, 8), rcx);
439 __ movl(Address(rsi,12), rdx);
440
441 //
442 // Check if OS has enabled XGETBV instruction to access XCR0
443 // (OSXSAVE feature flag) and CPU supports APX
444 //
445 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
446 // and XCRO[19] bit for OS support to save/restore extended GPR state.
447 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
448 __ movl(rax, 0x200000);
449 __ andl(rax, Address(rsi, 4));
450 __ jcc(Assembler::equal, vector_save_restore);
451 // check _cpuid_info.xem_xcr0_eax.bits.apx_f
452 __ movl(rax, 0x80000);
453 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
454 __ jcc(Assembler::equal, vector_save_restore);
455
456 bool save_apx = UseAPX;
457 VM_Version::set_apx_cpuFeatures();
458 UseAPX = true;
459 __ mov64(r16, VM_Version::egpr_test_value());
460 __ mov64(r31, VM_Version::egpr_test_value());
461 __ xorl(rsi, rsi);
462 VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
463 // Generate SEGV
464 __ movl(rax, Address(rsi, 0));
465
466 VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
467 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
468 __ movq(Address(rsi, 0), r16);
469 __ movq(Address(rsi, 8), r31);
470
471 //
472 // Query CPUID 0xD.19 for APX XSAVE offset
473 // Extended State Enumeration Sub-leaf 19 (APX)
474 // EAX = size of APX state (should be 128)
475 // EBX = offset in standard XSAVE format
476 //
477 __ movl(rax, 0xD);
478 __ movl(rcx, 19);
479 __ cpuid();
480 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
481 __ movl(Address(rsi, 0), rax);
482 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
483 __ movl(Address(rsi, 0), rbx);
484
485 UseAPX = save_apx;
486 __ bind(vector_save_restore);
487 //
488 // Check if OS has enabled XGETBV instruction to access XCR0
489 // (OSXSAVE feature flag) and CPU supports AVX
490 //
491 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
492 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
493 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
494 __ cmpl(rcx, 0x18000000);
495 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
496
497 __ movl(rax, 0x6);
498 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
499 __ cmpl(rax, 0x6);
500 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
501
502 // we need to bridge farther than imm8, so we use this island as a thunk
503 __ bind(done);
504 __ jmp(wrapup);
505
506 __ bind(start_simd_check);
507 //
508 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
509 // registers are not restored after a signal processing.
510 // Generate SEGV here (reference through null)
511 // and check upper YMM/ZMM bits after it.
512 //
513 int saved_useavx = UseAVX;
514 int saved_usesse = UseSSE;
515
516 // If UseAVX is uninitialized or is set by the user to include EVEX
517 if (use_evex) {
518 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
519 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
520 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
521 __ movl(rax, 0x10000);
522 __ andl(rax, Address(rsi, 4));
523 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
524 __ movl(rbx, 0x80000);
525 __ andl(rbx, Address(rsi, 4));
526 __ orl(rax, rbx);
527 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
528 // check _cpuid_info.xem_xcr0_eax.bits.opmask
529 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
530 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
531 __ movl(rax, 0xE0);
532 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
533 __ cmpl(rax, 0xE0);
534 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
535
536 if (FLAG_IS_DEFAULT(UseAVX)) {
537 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
538 __ movl(rax, Address(rsi, 0));
539 __ cmpl(rax, 0x50654); // If it is Skylake
540 __ jcc(Assembler::equal, legacy_setup);
541 }
542 // EVEX setup: run in lowest evex mode
543 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
544 UseAVX = 3;
545 UseSSE = 2;
546 #ifdef _WINDOWS
547 // xmm5-xmm15 are not preserved by caller on windows
548 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
549 __ subptr(rsp, 64);
550 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
551 __ subptr(rsp, 64);
552 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
553 __ subptr(rsp, 64);
554 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
555 #endif // _WINDOWS
556
557 // load value into all 64 bytes of zmm7 register
558 __ movl(rcx, VM_Version::ymm_test_value());
559 __ movdl(xmm0, rcx);
560 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
561 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
562 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
563 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
564 VM_Version::clean_cpuFeatures();
565 __ jmp(save_restore_except);
566 }
567
568 __ bind(legacy_setup);
569 // AVX setup
570 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
571 UseAVX = 1;
572 UseSSE = 2;
573 #ifdef _WINDOWS
574 __ subptr(rsp, 32);
575 __ vmovdqu(Address(rsp, 0), xmm7);
576 __ subptr(rsp, 32);
577 __ vmovdqu(Address(rsp, 0), xmm8);
578 __ subptr(rsp, 32);
579 __ vmovdqu(Address(rsp, 0), xmm15);
580 #endif // _WINDOWS
581
582 // load value into all 32 bytes of ymm7 register
583 __ movl(rcx, VM_Version::ymm_test_value());
584
585 __ movdl(xmm0, rcx);
586 __ pshufd(xmm0, xmm0, 0x00);
587 __ vinsertf128_high(xmm0, xmm0);
588 __ vmovdqu(xmm7, xmm0);
589 __ vmovdqu(xmm8, xmm0);
590 __ vmovdqu(xmm15, xmm0);
591 VM_Version::clean_cpuFeatures();
592
593 __ bind(save_restore_except);
594 __ xorl(rsi, rsi);
595 VM_Version::set_cpuinfo_segv_addr(__ pc());
596 // Generate SEGV
597 __ movl(rax, Address(rsi, 0));
598
599 VM_Version::set_cpuinfo_cont_addr(__ pc());
600 // Returns here after signal. Save xmm0 to check it later.
601
602 // If UseAVX is uninitialized or is set by the user to include EVEX
603 if (use_evex) {
604 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
605 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
606 __ movl(rax, 0x10000);
607 __ andl(rax, Address(rsi, 4));
608 __ jcc(Assembler::equal, legacy_save_restore);
609 // check _cpuid_info.xem_xcr0_eax.bits.opmask
610 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
611 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
612 __ movl(rax, 0xE0);
613 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
614 __ cmpl(rax, 0xE0);
615 __ jcc(Assembler::notEqual, legacy_save_restore);
616
617 if (FLAG_IS_DEFAULT(UseAVX)) {
618 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
619 __ movl(rax, Address(rsi, 0));
620 __ cmpl(rax, 0x50654); // If it is Skylake
621 __ jcc(Assembler::equal, legacy_save_restore);
622 }
623 // EVEX check: run in lowest evex mode
624 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
625 UseAVX = 3;
626 UseSSE = 2;
627 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
628 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
629 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
630 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
631 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
632
633 #ifdef _WINDOWS
634 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
635 __ addptr(rsp, 64);
636 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
637 __ addptr(rsp, 64);
638 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
639 __ addptr(rsp, 64);
640 #endif // _WINDOWS
641 generate_vzeroupper(wrapup);
642 VM_Version::clean_cpuFeatures();
643 UseAVX = saved_useavx;
644 UseSSE = saved_usesse;
645 __ jmp(wrapup);
646 }
647
648 __ bind(legacy_save_restore);
649 // AVX check
650 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
651 UseAVX = 1;
652 UseSSE = 2;
653 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
654 __ vmovdqu(Address(rsi, 0), xmm0);
655 __ vmovdqu(Address(rsi, 32), xmm7);
656 __ vmovdqu(Address(rsi, 64), xmm8);
657 __ vmovdqu(Address(rsi, 96), xmm15);
658
659 #ifdef _WINDOWS
660 __ vmovdqu(xmm15, Address(rsp, 0));
661 __ addptr(rsp, 32);
662 __ vmovdqu(xmm8, Address(rsp, 0));
663 __ addptr(rsp, 32);
664 __ vmovdqu(xmm7, Address(rsp, 0));
665 __ addptr(rsp, 32);
666 #endif // _WINDOWS
667
668 generate_vzeroupper(wrapup);
669 VM_Version::clean_cpuFeatures();
670 UseAVX = saved_useavx;
671 UseSSE = saved_usesse;
672
673 __ bind(wrapup);
674 __ popf();
675 __ pop(rsi);
676 __ pop(rbx);
677 __ pop(rbp);
678 __ ret(0);
679
680 # undef __
681
682 return start;
683 };
684 void generate_vzeroupper(Label& L_wrapup) {
685 # define __ _masm->
686 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
687 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
688 __ jcc(Assembler::notEqual, L_wrapup);
689 __ movl(rcx, 0x0FFF0FF0);
690 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
691 __ andl(rcx, Address(rsi, 0));
692 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
693 __ jcc(Assembler::equal, L_wrapup);
694 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
695 __ jcc(Assembler::equal, L_wrapup);
696 // vzeroupper() will use a pre-computed instruction sequence that we
697 // can't compute until after we've determined CPU capabilities. Use
698 // uncached variant here directly to be able to bootstrap correctly
699 __ vzeroupper_uncached();
700 # undef __
701 }
702 address generate_detect_virt() {
703 StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
704 # define __ _masm->
705
706 address start = __ pc();
707
708 // Evacuate callee-saved registers
709 __ push(rbp);
710 __ push(rbx);
711 __ push(rsi); // for Windows
712
713 __ mov(rax, c_rarg0); // CPUID leaf
714 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
715
716 __ cpuid();
717
718 // Store result to register array
719 __ movl(Address(rsi, 0), rax);
720 __ movl(Address(rsi, 4), rbx);
721 __ movl(Address(rsi, 8), rcx);
722 __ movl(Address(rsi, 12), rdx);
723
724 // Epilogue
725 __ pop(rsi);
726 __ pop(rbx);
727 __ pop(rbp);
728 __ ret(0);
729
730 # undef __
731
732 return start;
733 };
734
735
736 address generate_getCPUIDBrandString(void) {
737 // Flags to test CPU type.
738 const uint32_t HS_EFL_AC = 0x40000;
739 const uint32_t HS_EFL_ID = 0x200000;
740 // Values for when we don't have a CPUID instruction.
741 const int CPU_FAMILY_SHIFT = 8;
742 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
743 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
744
745 Label detect_486, cpu486, detect_586, done, ext_cpuid;
746
747 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
748 # define __ _masm->
749
750 address start = __ pc();
751
752 //
753 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
754 //
755 // rcx and rdx are first and second argument registers on windows
756
757 __ push(rbp);
758 __ mov(rbp, c_rarg0); // cpuid_info address
759 __ push(rbx);
760 __ push(rsi);
761 __ pushf(); // preserve rbx, and flags
762 __ pop(rax);
763 __ push(rax);
764 __ mov(rcx, rax);
765 //
766 // if we are unable to change the AC flag, we have a 386
767 //
768 __ xorl(rax, HS_EFL_AC);
769 __ push(rax);
770 __ popf();
771 __ pushf();
772 __ pop(rax);
773 __ cmpptr(rax, rcx);
774 __ jccb(Assembler::notEqual, detect_486);
775
776 __ movl(rax, CPU_FAMILY_386);
777 __ jmp(done);
778
779 //
780 // If we are unable to change the ID flag, we have a 486 which does
781 // not support the "cpuid" instruction.
782 //
783 __ bind(detect_486);
784 __ mov(rax, rcx);
785 __ xorl(rax, HS_EFL_ID);
786 __ push(rax);
787 __ popf();
788 __ pushf();
789 __ pop(rax);
790 __ cmpptr(rcx, rax);
791 __ jccb(Assembler::notEqual, detect_586);
792
793 __ bind(cpu486);
794 __ movl(rax, CPU_FAMILY_486);
795 __ jmp(done);
796
797 //
798 // At this point, we have a chip which supports the "cpuid" instruction
799 //
800 __ bind(detect_586);
801 __ xorl(rax, rax);
802 __ cpuid();
803 __ orl(rax, rax);
804 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
805 // value of at least 1, we give up and
806 // assume a 486
807
808 //
809 // Extended cpuid(0x80000000) for processor brand string detection
810 //
811 __ bind(ext_cpuid);
812 __ movl(rax, CPUID_EXTENDED_FN);
813 __ cpuid();
814 __ cmpl(rax, CPUID_EXTENDED_FN_4);
815 __ jcc(Assembler::below, done);
816
817 //
818 // Extended cpuid(0x80000002) // first 16 bytes in brand string
819 //
820 __ movl(rax, CPUID_EXTENDED_FN_2);
821 __ cpuid();
822 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
823 __ movl(Address(rsi, 0), rax);
824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
825 __ movl(Address(rsi, 0), rbx);
826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
827 __ movl(Address(rsi, 0), rcx);
828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
829 __ movl(Address(rsi,0), rdx);
830
831 //
832 // Extended cpuid(0x80000003) // next 16 bytes in brand string
833 //
834 __ movl(rax, CPUID_EXTENDED_FN_3);
835 __ cpuid();
836 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
837 __ movl(Address(rsi, 0), rax);
838 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
839 __ movl(Address(rsi, 0), rbx);
840 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
841 __ movl(Address(rsi, 0), rcx);
842 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
843 __ movl(Address(rsi,0), rdx);
844
845 //
846 // Extended cpuid(0x80000004) // last 16 bytes in brand string
847 //
848 __ movl(rax, CPUID_EXTENDED_FN_4);
849 __ cpuid();
850 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
851 __ movl(Address(rsi, 0), rax);
852 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
853 __ movl(Address(rsi, 0), rbx);
854 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
855 __ movl(Address(rsi, 0), rcx);
856 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
857 __ movl(Address(rsi,0), rdx);
858
859 //
860 // return
861 //
862 __ bind(done);
863 __ popf();
864 __ pop(rsi);
865 __ pop(rbx);
866 __ pop(rbp);
867 __ ret(0);
868
869 # undef __
870
871 return start;
872 };
873 };
874
875 void VM_Version::get_processor_features() {
876
877 _cpu = 4; // 486 by default
878 _model = 0;
879 _stepping = 0;
880 _logical_processors_per_package = 1;
881 // i486 internal cache is both I&D and has a 16-byte line size
882 _L1_data_cache_line_size = 16;
883
884 // Get raw processor info
885
886 get_cpu_info_stub(&_cpuid_info);
887
888 assert_is_initialized();
889 _cpu = extended_cpu_family();
890 _model = extended_cpu_model();
891 _stepping = cpu_stepping();
892
893 if (cpu_family() > 4) { // it supports CPUID
894 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
895 _cpu_features = _features; // Preserve features
896 // Logical processors are only available on P4s and above,
897 // and only if hyperthreading is available.
898 _logical_processors_per_package = logical_processor_count();
899 _L1_data_cache_line_size = L1_line_size();
900 }
901
902 // xchg and xadd instructions
903 _supports_atomic_getset4 = true;
904 _supports_atomic_getadd4 = true;
905 _supports_atomic_getset8 = true;
906 _supports_atomic_getadd8 = true;
907
908 // OS should support SSE for x64 and hardware should support at least SSE2.
909 if (!VM_Version::supports_sse2()) {
910 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
911 }
912 // in 64 bit the use of SSE2 is the minimum
913 if (UseSSE < 2) UseSSE = 2;
914
915 // flush_icache_stub have to be generated first.
916 // That is why Icache line size is hard coded in ICache class,
917 // see icache_x86.hpp. It is also the reason why we can't use
918 // clflush instruction in 32-bit VM since it could be running
919 // on CPU which does not support it.
920 //
921 // The only thing we can do is to verify that flushed
922 // ICache::line_size has correct value.
923 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
924 // clflush_size is size in quadwords (8 bytes).
925 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
926
927 // assigning this field effectively enables Unsafe.writebackMemory()
928 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
929 // that is only implemented on x86_64 and only if the OS plays ball
930 if (os::supports_map_sync()) {
931 // publish data cache line flush size to generic field, otherwise
932 // let if default to zero thereby disabling writeback
933 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
934 }
935
936 // Check if processor has Intel Ecore
937 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
938 (supports_hybrid() ||
939 _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
940 _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
941 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
942 }
943
944 if (UseSSE < 4) {
945 _features.clear_feature(CPU_SSE4_1);
946 _features.clear_feature(CPU_SSE4_2);
947 }
948
949 if (UseSSE < 3) {
950 _features.clear_feature(CPU_SSE3);
951 _features.clear_feature(CPU_SSSE3);
952 _features.clear_feature(CPU_SSE4A);
953 }
954
955 if (UseSSE < 2)
956 _features.clear_feature(CPU_SSE2);
957
958 if (UseSSE < 1)
959 _features.clear_feature(CPU_SSE);
960
961 // ZX cpus specific settings
962 if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
963 if (cpu_family() == 7) {
964 if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
965 UseAVX = 1;
966 } else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
967 UseAVX = 0;
968 }
969 } else if (cpu_family() == 6) {
970 UseAVX = 0;
971 }
972 }
973
974 // UseSSE is set to the smaller of what hardware supports and what
975 // the command line requires. I.e., you cannot set UseSSE to 2 on
976 // older Pentiums which do not support it.
977 int use_sse_limit = 0;
978 if (UseSSE > 0) {
979 if (UseSSE > 3 && supports_sse4_1()) {
980 use_sse_limit = 4;
981 } else if (UseSSE > 2 && supports_sse3()) {
982 use_sse_limit = 3;
983 } else if (UseSSE > 1 && supports_sse2()) {
984 use_sse_limit = 2;
985 } else if (UseSSE > 0 && supports_sse()) {
986 use_sse_limit = 1;
987 } else {
988 use_sse_limit = 0;
989 }
990 }
991 if (FLAG_IS_DEFAULT(UseSSE)) {
992 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
993 } else if (UseSSE > use_sse_limit) {
994 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
995 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
996 }
997
998 // first try initial setting and detect what we can support
999 int use_avx_limit = 0;
1000 if (UseAVX > 0) {
1001 if (UseSSE < 4) {
1002 // Don't use AVX if SSE is unavailable or has been disabled.
1003 use_avx_limit = 0;
1004 } else if (UseAVX > 2 && supports_evex()) {
1005 use_avx_limit = 3;
1006 } else if (UseAVX > 1 && supports_avx2()) {
1007 use_avx_limit = 2;
1008 } else if (UseAVX > 0 && supports_avx()) {
1009 use_avx_limit = 1;
1010 } else {
1011 use_avx_limit = 0;
1012 }
1013 }
1014 if (FLAG_IS_DEFAULT(UseAVX)) {
1015 // Don't use AVX-512 on older Skylakes unless explicitly requested.
1016 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1017 FLAG_SET_DEFAULT(UseAVX, 2);
1018 } else {
1019 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1020 }
1021 }
1022
1023 if (UseAVX > use_avx_limit) {
1024 if (UseSSE < 4) {
1025 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1026 } else {
1027 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1028 }
1029 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1030 }
1031
1032 if (UseAVX < 3) {
1033 _features.clear_feature(CPU_AVX512F);
1034 _features.clear_feature(CPU_AVX512DQ);
1035 _features.clear_feature(CPU_AVX512CD);
1036 _features.clear_feature(CPU_AVX512BW);
1037 _features.clear_feature(CPU_AVX512ER);
1038 _features.clear_feature(CPU_AVX512PF);
1039 _features.clear_feature(CPU_AVX512VL);
1040 _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1041 _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1042 _features.clear_feature(CPU_AVX512_VAES);
1043 _features.clear_feature(CPU_AVX512_VNNI);
1044 _features.clear_feature(CPU_AVX512_VBMI);
1045 _features.clear_feature(CPU_AVX512_VBMI2);
1046 _features.clear_feature(CPU_AVX512_BITALG);
1047 _features.clear_feature(CPU_AVX512_IFMA);
1048 _features.clear_feature(CPU_APX_F);
1049 _features.clear_feature(CPU_AVX512_FP16);
1050 _features.clear_feature(CPU_AVX10_1);
1051 _features.clear_feature(CPU_AVX10_2);
1052 }
1053
1054
1055 if (UseAVX < 2) {
1056 _features.clear_feature(CPU_AVX2);
1057 _features.clear_feature(CPU_AVX_IFMA);
1058 }
1059
1060 if (UseAVX < 1) {
1061 _features.clear_feature(CPU_AVX);
1062 _features.clear_feature(CPU_VZEROUPPER);
1063 _features.clear_feature(CPU_F16C);
1064 _features.clear_feature(CPU_SHA512);
1065 }
1066
1067 if (logical_processors_per_package() == 1) {
1068 // HT processor could be installed on a system which doesn't support HT.
1069 _features.clear_feature(CPU_HT);
1070 }
1071
1072 if (is_intel()) { // Intel cpus specific settings
1073 if (is_knights_family()) {
1074 _features.clear_feature(CPU_VZEROUPPER);
1075 _features.clear_feature(CPU_AVX512BW);
1076 _features.clear_feature(CPU_AVX512VL);
1077 _features.clear_feature(CPU_APX_F);
1078 _features.clear_feature(CPU_AVX512DQ);
1079 _features.clear_feature(CPU_AVX512_VNNI);
1080 _features.clear_feature(CPU_AVX512_VAES);
1081 _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1082 _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1083 _features.clear_feature(CPU_AVX512_VBMI);
1084 _features.clear_feature(CPU_AVX512_VBMI2);
1085 _features.clear_feature(CPU_CLWB);
1086 _features.clear_feature(CPU_FLUSHOPT);
1087 _features.clear_feature(CPU_GFNI);
1088 _features.clear_feature(CPU_AVX512_BITALG);
1089 _features.clear_feature(CPU_AVX512_IFMA);
1090 _features.clear_feature(CPU_AVX_IFMA);
1091 _features.clear_feature(CPU_AVX512_FP16);
1092 _features.clear_feature(CPU_AVX10_1);
1093 _features.clear_feature(CPU_AVX10_2);
1094 }
1095 }
1096
1097 // Currently APX support is only enabled for targets supporting AVX512VL feature.
1098 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1099 if (UseAPX && !apx_supported) {
1100 warning("UseAPX is not supported on this CPU, setting it to false");
1101 FLAG_SET_DEFAULT(UseAPX, false);
1102 }
1103
1104 if (!UseAPX) {
1105 _features.clear_feature(CPU_APX_F);
1106 }
1107
1108 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1109 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1110 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1111 } else {
1112 _has_intel_jcc_erratum = IntelJccErratumMitigation;
1113 }
1114
1115 assert(supports_clflush(), "Always present");
1116 if (X86ICacheSync == -1) {
1117 // Auto-detect, choosing the best performant one that still flushes
1118 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1119 if (supports_clwb()) {
1120 FLAG_SET_ERGO(X86ICacheSync, 3);
1121 } else if (supports_clflushopt()) {
1122 FLAG_SET_ERGO(X86ICacheSync, 2);
1123 } else {
1124 FLAG_SET_ERGO(X86ICacheSync, 1);
1125 }
1126 } else {
1127 if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1128 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1129 }
1130 if ((X86ICacheSync == 3) && !supports_clwb()) {
1131 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1132 }
1133 if ((X86ICacheSync == 5) && !supports_serialize()) {
1134 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1135 }
1136 }
1137
1138 stringStream ss(2048);
1139 if (supports_hybrid()) {
1140 ss.print("(hybrid)");
1141 } else {
1142 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1143 }
1144 ss.print(" family %d model %d stepping %d microcode 0x%x",
1145 cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1146 ss.print(", ");
1147 int features_offset = (int)ss.size();
1148 insert_features_names(_features, ss);
1149
1150 _cpu_info_string = ss.as_string(true);
1151 _features_string = _cpu_info_string + features_offset;
1152
1153 // Use AES instructions if available.
1154 if (supports_aes()) {
1155 if (FLAG_IS_DEFAULT(UseAES)) {
1156 FLAG_SET_DEFAULT(UseAES, true);
1157 }
1158 if (!UseAES) {
1159 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1160 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1161 }
1162 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1163 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1164 warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1165 }
1166 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1167 } else {
1168 if (UseSSE > 2) {
1169 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1170 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1171 }
1172 } else {
1173 // The AES intrinsic stubs require AES instruction support (of course)
1174 // but also require sse3 mode or higher for instructions it use.
1175 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1176 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1177 }
1178 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1179 }
1180
1181 // --AES-CTR begins--
1182 if (!UseAESIntrinsics) {
1183 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1184 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1185 }
1186 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1187 } else {
1188 if (supports_sse4_1()) {
1189 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1190 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1191 }
1192 } else {
1193 // The AES-CTR intrinsic stubs require AES instruction support (of course)
1194 // but also require sse4.1 mode or higher for instructions it use.
1195 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1196 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1197 }
1198 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1199 }
1200 }
1201 // --AES-CTR ends--
1202 }
1203 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1204 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1205 warning("AES instructions are not available on this CPU");
1206 }
1207 FLAG_SET_DEFAULT(UseAES, false);
1208 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1209 warning("AES intrinsics are not available on this CPU");
1210 }
1211 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1212 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1213 warning("AES-CTR intrinsics are not available on this CPU");
1214 }
1215 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1216 }
1217
1218 // Use CLMUL instructions if available.
1219 if (supports_clmul()) {
1220 if (FLAG_IS_DEFAULT(UseCLMUL)) {
1221 UseCLMUL = true;
1222 }
1223 } else if (UseCLMUL) {
1224 if (!FLAG_IS_DEFAULT(UseCLMUL))
1225 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1226 FLAG_SET_DEFAULT(UseCLMUL, false);
1227 }
1228
1229 if (UseCLMUL && (UseSSE > 2)) {
1230 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1231 UseCRC32Intrinsics = true;
1232 }
1233 } else if (UseCRC32Intrinsics) {
1234 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1235 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1236 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1237 }
1238
1239 if (supports_avx2()) {
1240 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1241 UseAdler32Intrinsics = true;
1242 }
1243 } else if (UseAdler32Intrinsics) {
1244 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1245 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1246 }
1247 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1248 }
1249
1250 if (supports_sse4_2() && supports_clmul()) {
1251 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1252 UseCRC32CIntrinsics = true;
1253 }
1254 } else if (UseCRC32CIntrinsics) {
1255 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1256 warning("CRC32C intrinsics are not available on this CPU");
1257 }
1258 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1259 }
1260
1261 // GHASH/GCM intrinsics
1262 if (UseCLMUL && (UseSSE > 2)) {
1263 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1264 UseGHASHIntrinsics = true;
1265 }
1266 } else if (UseGHASHIntrinsics) {
1267 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1268 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1269 }
1270 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1271 }
1272
1273 // ChaCha20 Intrinsics
1274 // As long as the system supports AVX as a baseline we can do a
1275 // SIMD-enabled block function. StubGenerator makes the determination
1276 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1277 // version.
1278 if (UseAVX >= 1) {
1279 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1280 UseChaCha20Intrinsics = true;
1281 }
1282 } else if (UseChaCha20Intrinsics) {
1283 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1284 warning("ChaCha20 intrinsic requires AVX instructions");
1285 }
1286 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1287 }
1288
1289 // Kyber Intrinsics
1290 // Currently we only have them for AVX512
1291 if (supports_evex() && supports_avx512bw()) {
1292 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1293 UseKyberIntrinsics = true;
1294 }
1295 } else if (UseKyberIntrinsics) {
1296 if (!FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1297 warning("Intrinsics for ML-KEM are not available on this CPU.");
1298 }
1299 FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1300 }
1301
1302 // Dilithium Intrinsics
1303 if (UseAVX > 1) {
1304 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1305 UseDilithiumIntrinsics = true;
1306 }
1307 } else if (UseDilithiumIntrinsics) {
1308 if (!FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1309 warning("Intrinsics for ML-DSA are not available on this CPU.");
1310 }
1311 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1312 }
1313
1314 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1315 if (UseAVX >= 2) {
1316 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1317 UseBASE64Intrinsics = true;
1318 }
1319 } else if (UseBASE64Intrinsics) {
1320 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1321 warning("Base64 intrinsic requires EVEX instructions on this CPU");
1322 }
1323 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1324 }
1325
1326 if (supports_fma()) {
1327 if (FLAG_IS_DEFAULT(UseFMA)) {
1328 UseFMA = true;
1329 }
1330 } else if (UseFMA) {
1331 if (!FLAG_IS_DEFAULT(UseFMA)) {
1332 warning("FMA instructions are not available on this CPU");
1333 }
1334 FLAG_SET_DEFAULT(UseFMA, false);
1335 }
1336
1337 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1338 UseMD5Intrinsics = true;
1339 }
1340
1341 if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1342 if (FLAG_IS_DEFAULT(UseSHA)) {
1343 UseSHA = true;
1344 }
1345 } else if (UseSHA) {
1346 if (!FLAG_IS_DEFAULT(UseSHA)) {
1347 warning("SHA instructions are not available on this CPU");
1348 }
1349 FLAG_SET_DEFAULT(UseSHA, false);
1350 }
1351
1352 if (supports_sha() && supports_sse4_1() && UseSHA) {
1353 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1354 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1355 }
1356 } else if (UseSHA1Intrinsics) {
1357 if (!FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1358 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1359 }
1360 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1361 }
1362
1363 if (supports_sse4_1() && UseSHA) {
1364 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1365 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1366 }
1367 } else if (UseSHA256Intrinsics) {
1368 if (!FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1369 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1370 }
1371 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1372 }
1373
1374 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1375 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1376 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1377 }
1378 } else if (UseSHA512Intrinsics) {
1379 if (!FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1380 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1381 }
1382 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1383 }
1384
1385 if (UseSHA && supports_evex() && supports_avx512bw()) {
1386 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1387 FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1388 }
1389 } else if (UseSHA3Intrinsics) {
1390 if (!FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1391 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1392 }
1393 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1394 }
1395
1396 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics || UseSHA3Intrinsics)) {
1397 FLAG_SET_DEFAULT(UseSHA, false);
1398 }
1399
1400 #if COMPILER2_OR_JVMCI
1401 int max_vector_size = 0;
1402 if (UseAVX == 0 || !os_supports_avx_vectors()) {
1403 // 16 byte vectors (in XMM) are supported with SSE2+
1404 max_vector_size = 16;
1405 } else if (UseAVX == 1 || UseAVX == 2) {
1406 // 32 bytes vectors (in YMM) are only supported with AVX+
1407 max_vector_size = 32;
1408 } else if (UseAVX > 2) {
1409 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1410 max_vector_size = 64;
1411 }
1412
1413 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1414
1415 if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1416 if (MaxVectorSize < min_vector_size) {
1417 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1418 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1419 }
1420 if (MaxVectorSize > max_vector_size) {
1421 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1422 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1423 }
1424 if (!is_power_of_2(MaxVectorSize)) {
1425 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1426 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1427 }
1428 } else {
1429 // If default, use highest supported configuration
1430 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1431 }
1432
1433 #if defined(COMPILER2) && defined(ASSERT)
1434 if (MaxVectorSize > 0) {
1435 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1436 tty->print_cr("State of YMM registers after signal handle:");
1437 int nreg = 4;
1438 const char* ymm_name[4] = {"0", "7", "8", "15"};
1439 for (int i = 0; i < nreg; i++) {
1440 tty->print("YMM%s:", ymm_name[i]);
1441 for (int j = 7; j >=0; j--) {
1442 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1443 }
1444 tty->cr();
1445 }
1446 }
1447 }
1448 #endif // COMPILER2 && ASSERT
1449
1450 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1451 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1452 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1453 }
1454 } else if (UsePoly1305Intrinsics) {
1455 if (!FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1456 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1457 }
1458 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1459 }
1460
1461 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1462 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1463 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1464 }
1465 } else if (UseIntPolyIntrinsics) {
1466 if (!FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1467 warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1468 }
1469 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1470 }
1471
1472 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1473 UseMultiplyToLenIntrinsic = true;
1474 }
1475 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1476 UseSquareToLenIntrinsic = true;
1477 }
1478 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1479 UseMulAddIntrinsic = true;
1480 }
1481 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1482 UseMontgomeryMultiplyIntrinsic = true;
1483 }
1484 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1485 UseMontgomerySquareIntrinsic = true;
1486 }
1487 #endif // COMPILER2_OR_JVMCI
1488
1489 // On new cpus instructions which update whole XMM register should be used
1490 // to prevent partial register stall due to dependencies on high half.
1491 //
1492 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1493 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1494 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1495 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1496
1497
1498 if (is_zx()) { // ZX cpus specific settings
1499 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1500 UseStoreImmI16 = false; // don't use it on ZX cpus
1501 }
1502 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1503 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1504 // Use it on all ZX cpus
1505 UseAddressNop = true;
1506 }
1507 }
1508 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1509 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1510 }
1511 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1512 if (supports_sse3()) {
1513 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1514 } else {
1515 UseXmmRegToRegMoveAll = false;
1516 }
1517 }
1518 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1519 #ifdef COMPILER2
1520 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1521 // For new ZX cpus do the next optimization:
1522 // don't align the beginning of a loop if there are enough instructions
1523 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1524 // in current fetch line (OptoLoopAlignment) or the padding
1525 // is big (> MaxLoopPad).
1526 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1527 // generated NOP instructions. 11 is the largest size of one
1528 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1529 MaxLoopPad = 11;
1530 }
1531 #endif // COMPILER2
1532 if (supports_sse4_2()) { // new ZX cpus
1533 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1534 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1535 }
1536 }
1537 }
1538
1539 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1540 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1541 }
1542 }
1543
1544 if (is_amd_family()) { // AMD cpus specific settings
1545 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1546 // Use it on new AMD cpus starting from Opteron.
1547 UseAddressNop = true;
1548 }
1549 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1550 if (supports_sse4a()) {
1551 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1552 } else {
1553 UseXmmLoadAndClearUpper = false;
1554 }
1555 }
1556 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1557 if (supports_sse4a()) {
1558 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1559 } else {
1560 UseXmmRegToRegMoveAll = false;
1561 }
1562 }
1563 if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1564 if (supports_sse4a()) {
1565 UseXmmI2F = true;
1566 } else {
1567 UseXmmI2F = false;
1568 }
1569 }
1570 if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1571 if (supports_sse4a()) {
1572 UseXmmI2D = true;
1573 } else {
1574 UseXmmI2D = false;
1575 }
1576 }
1577
1578 // some defaults for AMD family 15h
1579 if (cpu_family() == 0x15) {
1580 // On family 15h processors default is no sw prefetch
1581 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1582 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1583 }
1584 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1585 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1586 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1587 }
1588 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1589 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1590 }
1591 }
1592
1593 #ifdef COMPILER2
1594 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1595 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1596 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1597 }
1598 #endif // COMPILER2
1599
1600 // Some defaults for AMD family >= 17h && Hygon family 18h
1601 if (cpu_family() >= 0x17) {
1602 // On family >=17h processors use XMM and UnalignedLoadStores
1603 // for Array Copy
1604 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1605 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1606 }
1607 #ifdef COMPILER2
1608 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1609 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1610 }
1611 #endif
1612 }
1613 }
1614
1615 if (is_intel()) { // Intel cpus specific settings
1616 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1617 UseStoreImmI16 = false; // don't use it on Intel cpus
1618 }
1619 if (is_intel_server_family() || cpu_family() == 15) {
1620 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1621 // Use it on all Intel cpus starting from PentiumPro
1622 UseAddressNop = true;
1623 }
1624 }
1625 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1626 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1627 }
1628 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1629 if (supports_sse3()) {
1630 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1631 } else {
1632 UseXmmRegToRegMoveAll = false;
1633 }
1634 }
1635 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1636 #ifdef COMPILER2
1637 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1638 // For new Intel cpus do the next optimization:
1639 // don't align the beginning of a loop if there are enough instructions
1640 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1641 // in current fetch line (OptoLoopAlignment) or the padding
1642 // is big (> MaxLoopPad).
1643 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1644 // generated NOP instructions. 11 is the largest size of one
1645 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1646 MaxLoopPad = 11;
1647 }
1648 #endif // COMPILER2
1649
1650 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1651 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1652 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1653 }
1654 }
1655 }
1656 if (is_atom_family() || is_knights_family()) {
1657 #ifdef COMPILER2
1658 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1659 OptoScheduling = true;
1660 }
1661 #endif
1662 if (supports_sse4_2()) { // Silvermont
1663 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1664 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1665 }
1666 }
1667 if (FLAG_IS_DEFAULT(UseIncDec)) {
1668 FLAG_SET_DEFAULT(UseIncDec, false);
1669 }
1670 }
1671 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1672 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1673 }
1674 }
1675
1676 #ifdef COMPILER2
1677 if (UseAVX > 2) {
1678 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1679 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1680 ArrayOperationPartialInlineSize != 0 &&
1681 ArrayOperationPartialInlineSize != 16 &&
1682 ArrayOperationPartialInlineSize != 32 &&
1683 ArrayOperationPartialInlineSize != 64)) {
1684 int inline_size = 0;
1685 if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1686 inline_size = 64;
1687 } else if (MaxVectorSize >= 32) {
1688 inline_size = 32;
1689 } else if (MaxVectorSize >= 16) {
1690 inline_size = 16;
1691 }
1692 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1693 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1694 }
1695 ArrayOperationPartialInlineSize = inline_size;
1696 }
1697
1698 if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1699 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1700 if (ArrayOperationPartialInlineSize) {
1701 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1702 } else {
1703 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1704 }
1705 }
1706 }
1707
1708 if (FLAG_IS_DEFAULT(OptimizeFill)) {
1709 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1710 OptimizeFill = false;
1711 }
1712 }
1713 #endif
1714 if (supports_sse4_2()) {
1715 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1716 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1717 }
1718 } else if (UseSSE42Intrinsics) {
1719 if (!FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1720 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1721 }
1722 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1723 }
1724 if (UseSSE42Intrinsics) {
1725 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1726 UseVectorizedMismatchIntrinsic = true;
1727 }
1728 } else if (UseVectorizedMismatchIntrinsic) {
1729 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1730 warning("vectorizedMismatch intrinsics are not available on this CPU");
1731 }
1732 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1733 }
1734 if (UseAVX >= 2) {
1735 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1736 } else if (UseVectorizedHashCodeIntrinsic) {
1737 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1738 warning("vectorizedHashCode intrinsics are not available on this CPU");
1739 }
1740 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1741 }
1742
1743 // Use count leading zeros count instruction if available.
1744 if (supports_lzcnt()) {
1745 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1746 UseCountLeadingZerosInstruction = true;
1747 }
1748 } else if (UseCountLeadingZerosInstruction) {
1749 if (!FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1750 warning("lzcnt instruction is not available on this CPU");
1751 }
1752 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1753 }
1754
1755 // Use count trailing zeros instruction if available
1756 if (supports_bmi1()) {
1757 // tzcnt does not require VEX prefix
1758 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1759 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1760 // Don't use tzcnt if BMI1 is switched off on command line.
1761 UseCountTrailingZerosInstruction = false;
1762 } else {
1763 UseCountTrailingZerosInstruction = true;
1764 }
1765 }
1766 } else if (UseCountTrailingZerosInstruction) {
1767 if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1768 warning("tzcnt instruction is not available on this CPU");
1769 }
1770 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1771 }
1772
1773 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1774 // VEX prefix is generated only when AVX > 0.
1775 if (supports_bmi1() && supports_avx()) {
1776 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1777 UseBMI1Instructions = true;
1778 }
1779 } else if (UseBMI1Instructions) {
1780 if (!FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1781 warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1782 }
1783 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1784 }
1785
1786 if (supports_bmi2() && supports_avx()) {
1787 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1788 UseBMI2Instructions = true;
1789 }
1790 } else if (UseBMI2Instructions) {
1791 if (!FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1792 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1793 }
1794 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1795 }
1796
1797 // Use population count instruction if available.
1798 if (supports_popcnt()) {
1799 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1800 UsePopCountInstruction = true;
1801 }
1802 } else if (UsePopCountInstruction) {
1803 if (!FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1804 warning("POPCNT instruction is not available on this CPU");
1805 }
1806 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1807 }
1808
1809 // Use fast-string operations if available.
1810 if (supports_erms()) {
1811 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1812 UseFastStosb = true;
1813 }
1814 } else if (UseFastStosb) {
1815 if (!FLAG_IS_DEFAULT(UseFastStosb)) {
1816 warning("fast-string operations are not available on this CPU");
1817 }
1818 FLAG_SET_DEFAULT(UseFastStosb, false);
1819 }
1820
1821 // For AMD Processors use XMM/YMM MOVDQU instructions
1822 // for Object Initialization as default
1823 if (is_amd() && cpu_family() >= 0x19) {
1824 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1825 UseFastStosb = false;
1826 }
1827 }
1828
1829 #ifdef COMPILER2
1830 if (is_intel() && MaxVectorSize > 16) {
1831 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1832 UseFastStosb = false;
1833 }
1834 }
1835 #endif
1836
1837 // Use XMM/YMM MOVDQU instruction for Object Initialization
1838 if (UseUnalignedLoadStores) {
1839 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1840 UseXMMForObjInit = true;
1841 }
1842 } else if (UseXMMForObjInit) {
1843 if (!FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1844 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1845 }
1846 FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1847 }
1848
1849 #ifdef COMPILER2
1850 if (FLAG_IS_DEFAULT(AlignVector)) {
1851 // Modern processors allow misaligned memory operations for vectors.
1852 AlignVector = !UseUnalignedLoadStores;
1853 }
1854 #endif // COMPILER2
1855
1856 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1857 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1858 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1859 } else if (!supports_sse() && supports_3dnow_prefetch()) {
1860 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1861 }
1862 }
1863
1864 // Allocation prefetch settings
1865 int cache_line_size = checked_cast<int>(prefetch_data_size());
1866 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1867 (cache_line_size > AllocatePrefetchStepSize)) {
1868 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1869 }
1870
1871 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1872 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1873 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1874 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1875 }
1876 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1877 }
1878
1879 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1880 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1881 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1882 }
1883
1884 if (is_intel() && is_intel_server_family() && supports_sse3()) {
1885 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1886 supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1887 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1888 }
1889 #ifdef COMPILER2
1890 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1891 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1892 }
1893 #endif
1894 }
1895
1896 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1897 #ifdef COMPILER2
1898 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1899 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1900 }
1901 #endif
1902 }
1903
1904 // Prefetch settings
1905
1906 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1907 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1908 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1909 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1910
1911 // gc copy/scan is disabled if prefetchw isn't supported, because
1912 // Prefetch::write emits an inlined prefetchw on Linux.
1913 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1914 // The used prefetcht0 instruction works for both amd64 and em64t.
1915
1916 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1917 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1918 }
1919 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1920 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1921 }
1922
1923 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1924 (cache_line_size > ContendedPaddingWidth))
1925 ContendedPaddingWidth = cache_line_size;
1926
1927 // This machine allows unaligned memory accesses
1928 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1929 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1930 }
1931
1932 #ifndef PRODUCT
1933 if (log_is_enabled(Info, os, cpu)) {
1934 LogStream ls(Log(os, cpu)::info());
1935 outputStream* log = &ls;
1936 log->print_cr("Logical CPUs per core: %u",
1937 logical_processors_per_package());
1938 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1939 log->print("UseSSE=%d", UseSSE);
1940 if (UseAVX > 0) {
1941 log->print(" UseAVX=%d", UseAVX);
1942 }
1943 if (UseAES) {
1944 log->print(" UseAES=1");
1945 }
1946 #ifdef COMPILER2
1947 if (MaxVectorSize > 0) {
1948 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1949 }
1950 #endif
1951 log->cr();
1952 log->print("Allocation");
1953 if (AllocatePrefetchStyle <= 0) {
1954 log->print_cr(": no prefetching");
1955 } else {
1956 log->print(" prefetching: ");
1957 if (AllocatePrefetchInstr == 0) {
1958 log->print("PREFETCHNTA");
1959 } else if (AllocatePrefetchInstr == 1) {
1960 log->print("PREFETCHT0");
1961 } else if (AllocatePrefetchInstr == 2) {
1962 log->print("PREFETCHT2");
1963 } else if (AllocatePrefetchInstr == 3) {
1964 log->print("PREFETCHW");
1965 }
1966 if (AllocatePrefetchLines > 1) {
1967 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1968 } else {
1969 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1970 }
1971 }
1972
1973 if (PrefetchCopyIntervalInBytes > 0) {
1974 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1975 }
1976 if (PrefetchScanIntervalInBytes > 0) {
1977 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1978 }
1979 if (ContendedPaddingWidth > 0) {
1980 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1981 }
1982 }
1983 #endif // !PRODUCT
1984 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1985 FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1986 }
1987 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1988 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1989 }
1990 // CopyAVX3Threshold is the threshold at which 64-byte instructions are used
1991 // for implementing the array copy and clear operations.
1992 // The Intel platforms that supports the serialize instruction
1993 // have improved implementation of 64-byte load/stores and so the default
1994 // threshold is set to 0 for these platforms.
1995 if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
1996 if (is_intel() && is_intel_server_family() && supports_serialize()) {
1997 FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1998 } else {
1999 FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
2000 }
2001 }
2002 }
2003
2004 void VM_Version::print_platform_virtualization_info(outputStream* st) {
2005 VirtualizationType vrt = VM_Version::get_detected_virtualization();
2006 if (vrt == XenHVM) {
2007 st->print_cr("Xen hardware-assisted virtualization detected");
2008 } else if (vrt == KVM) {
2009 st->print_cr("KVM virtualization detected");
2010 } else if (vrt == VMWare) {
2011 st->print_cr("VMWare virtualization detected");
2012 VirtualizationSupport::print_virtualization_info(st);
2013 } else if (vrt == HyperV) {
2014 st->print_cr("Hyper-V virtualization detected");
2015 } else if (vrt == HyperVRole) {
2016 st->print_cr("Hyper-V role detected");
2017 }
2018 }
2019
2020 bool VM_Version::compute_has_intel_jcc_erratum() {
2021 if (!is_intel_family_core()) {
2022 // Only Intel CPUs are affected.
2023 return false;
2024 }
2025 // The following table of affected CPUs is based on the following document released by Intel:
2026 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
2027 switch (_model) {
2028 case 0x8E:
2029 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2030 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
2031 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
2032 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
2033 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
2034 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2035 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
2036 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
2037 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
2038 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
2039 case 0x4E:
2040 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2041 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2042 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2043 return _stepping == 0x3;
2044 case 0x55:
2045 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2046 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2047 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2048 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2049 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2050 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2051 return _stepping == 0x4 || _stepping == 0x7;
2052 case 0x5E:
2053 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2054 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2055 return _stepping == 0x3;
2056 case 0x9E:
2057 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2058 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2059 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2060 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2061 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2062 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2063 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2064 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2065 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2066 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2067 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2068 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2069 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2070 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2071 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2072 case 0xA5:
2073 // Not in Intel documentation.
2074 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2075 return true;
2076 case 0xA6:
2077 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2078 return _stepping == 0x0;
2079 case 0xAE:
2080 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2081 return _stepping == 0xA;
2082 default:
2083 // If we are running on another intel machine not recognized in the table, we are okay.
2084 return false;
2085 }
2086 }
2087
2088 // On Xen, the cpuid instruction returns
2089 // eax / registers[0]: Version of Xen
2090 // ebx / registers[1]: chars 'XenV'
2091 // ecx / registers[2]: chars 'MMXe'
2092 // edx / registers[3]: chars 'nVMM'
2093 //
2094 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2095 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2096 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2097 // edx / registers[3]: chars 'M' / 'ware' / 't Hv'
2098 //
2099 // more information :
2100 // https://kb.vmware.com/s/article/1009458
2101 //
2102 void VM_Version::check_virtualizations() {
2103 uint32_t registers[4] = {0};
2104 char signature[13] = {0};
2105
2106 // Xen cpuid leaves can be found 0x100 aligned boundary starting
2107 // from 0x40000000 until 0x40010000.
2108 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2109 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2110 detect_virt_stub(leaf, registers);
2111 memcpy(signature, ®isters[1], 12);
2112
2113 if (strncmp("VMwareVMware", signature, 12) == 0) {
2114 Abstract_VM_Version::_detected_virtualization = VMWare;
2115 // check for extended metrics from guestlib
2116 VirtualizationSupport::initialize();
2117 } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2118 Abstract_VM_Version::_detected_virtualization = HyperV;
2119 #ifdef _WINDOWS
2120 // CPUID leaf 0x40000007 is available to the root partition only.
2121 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2122 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2123 detect_virt_stub(0x40000007, registers);
2124 if ((registers[0] != 0x0) ||
2125 (registers[1] != 0x0) ||
2126 (registers[2] != 0x0) ||
2127 (registers[3] != 0x0)) {
2128 Abstract_VM_Version::_detected_virtualization = HyperVRole;
2129 }
2130 #endif
2131 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2132 Abstract_VM_Version::_detected_virtualization = KVM;
2133 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2134 Abstract_VM_Version::_detected_virtualization = XenHVM;
2135 }
2136 }
2137 }
2138
2139 #ifdef COMPILER2
2140 // Determine if it's running on Cascade Lake using default options.
2141 bool VM_Version::is_default_intel_cascade_lake() {
2142 return FLAG_IS_DEFAULT(UseAVX) &&
2143 FLAG_IS_DEFAULT(MaxVectorSize) &&
2144 UseAVX > 2 &&
2145 is_intel_cascade_lake();
2146 }
2147 #endif
2148
2149 bool VM_Version::is_intel_cascade_lake() {
2150 return is_intel_skylake() && _stepping >= 5;
2151 }
2152
2153 bool VM_Version::is_intel_darkmont() {
2154 return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2155 }
2156
2157 void VM_Version::clear_apx_test_state() {
2158 clear_apx_test_state_stub();
2159 }
2160
2161 static bool _vm_version_initialized = false;
2162
2163 void VM_Version::initialize() {
2164 ResourceMark rm;
2165
2166 // Making this stub must be FIRST use of assembler
2167 stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2168 if (stub_blob == nullptr) {
2169 vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2170 }
2171 CodeBuffer c(stub_blob);
2172 VM_Version_StubGenerator g(&c);
2173
2174 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2175 g.generate_get_cpu_info());
2176 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2177 g.generate_detect_virt());
2178 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2179 g.clear_apx_test_state());
2180 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2181 g.generate_getCPUIDBrandString());
2182 get_processor_features();
2183
2184 Assembler::precompute_instructions();
2185
2186 if (VM_Version::supports_hv()) { // Supports hypervisor
2187 check_virtualizations();
2188 }
2189 _vm_version_initialized = true;
2190 }
2191
2192 typedef enum {
2193 CPU_FAMILY_8086_8088 = 0,
2194 CPU_FAMILY_INTEL_286 = 2,
2195 CPU_FAMILY_INTEL_386 = 3,
2196 CPU_FAMILY_INTEL_486 = 4,
2197 CPU_FAMILY_PENTIUM = 5,
2198 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models
2199 CPU_FAMILY_PENTIUM_4 = 0xF
2200 } FamilyFlag;
2201
2202 typedef enum {
2203 RDTSCP_FLAG = 0x08000000, // bit 27
2204 INTEL64_FLAG = 0x20000000 // bit 29
2205 } _featureExtendedEdxFlag;
2206
2207 typedef enum {
2208 FPU_FLAG = 0x00000001,
2209 VME_FLAG = 0x00000002,
2210 DE_FLAG = 0x00000004,
2211 PSE_FLAG = 0x00000008,
2212 TSC_FLAG = 0x00000010,
2213 MSR_FLAG = 0x00000020,
2214 PAE_FLAG = 0x00000040,
2215 MCE_FLAG = 0x00000080,
2216 CX8_FLAG = 0x00000100,
2217 APIC_FLAG = 0x00000200,
2218 SEP_FLAG = 0x00000800,
2219 MTRR_FLAG = 0x00001000,
2220 PGE_FLAG = 0x00002000,
2221 MCA_FLAG = 0x00004000,
2222 CMOV_FLAG = 0x00008000,
2223 PAT_FLAG = 0x00010000,
2224 PSE36_FLAG = 0x00020000,
2225 PSNUM_FLAG = 0x00040000,
2226 CLFLUSH_FLAG = 0x00080000,
2227 DTS_FLAG = 0x00200000,
2228 ACPI_FLAG = 0x00400000,
2229 MMX_FLAG = 0x00800000,
2230 FXSR_FLAG = 0x01000000,
2231 SSE_FLAG = 0x02000000,
2232 SSE2_FLAG = 0x04000000,
2233 SS_FLAG = 0x08000000,
2234 HTT_FLAG = 0x10000000,
2235 TM_FLAG = 0x20000000
2236 } FeatureEdxFlag;
2237
2238 // VM_Version statics
2239 enum {
2240 ExtendedFamilyIdLength_INTEL = 16,
2241 ExtendedFamilyIdLength_AMD = 24
2242 };
2243
2244 const size_t VENDOR_LENGTH = 13;
2245 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2246 static char* _cpu_brand_string = nullptr;
2247 static int64_t _max_qualified_cpu_frequency = 0;
2248
2249 static int _no_of_threads = 0;
2250 static int _no_of_cores = 0;
2251
2252 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2253 "8086/8088",
2254 "",
2255 "286",
2256 "386",
2257 "486",
2258 "Pentium",
2259 "Pentium Pro", //or Pentium-M/Woodcrest depending on model
2260 "",
2261 "",
2262 "",
2263 "",
2264 "",
2265 "",
2266 "",
2267 "",
2268 "Pentium 4"
2269 };
2270
2271 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2272 "",
2273 "",
2274 "",
2275 "",
2276 "5x86",
2277 "K5/K6",
2278 "Athlon/AthlonXP",
2279 "",
2280 "",
2281 "",
2282 "",
2283 "",
2284 "",
2285 "",
2286 "",
2287 "Opteron/Athlon64",
2288 "Opteron QC/Phenom", // Barcelona et.al.
2289 "",
2290 "",
2291 "",
2292 "",
2293 "",
2294 "",
2295 "Zen"
2296 };
2297 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2298 // September 2013, Vol 3C Table 35-1
2299 const char* const _model_id_pentium_pro[] = {
2300 "",
2301 "Pentium Pro",
2302 "",
2303 "Pentium II model 3",
2304 "",
2305 "Pentium II model 5/Xeon/Celeron",
2306 "Celeron",
2307 "Pentium III/Pentium III Xeon",
2308 "Pentium III/Pentium III Xeon",
2309 "Pentium M model 9", // Yonah
2310 "Pentium III, model A",
2311 "Pentium III, model B",
2312 "",
2313 "Pentium M model D", // Dothan
2314 "",
2315 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2316 "",
2317 "",
2318 "",
2319 "",
2320 "",
2321 "",
2322 "Celeron", // 0x16 Celeron 65nm
2323 "Core 2", // 0x17 Penryn / Harpertown
2324 "",
2325 "",
2326 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP
2327 "Atom", // 0x1B Z5xx series Silverthorn
2328 "",
2329 "Core 2", // 0x1D Dunnington (6-core)
2330 "Nehalem", // 0x1E CPU_MODEL_NEHALEM
2331 "",
2332 "",
2333 "",
2334 "",
2335 "",
2336 "",
2337 "Westmere", // 0x25 CPU_MODEL_WESTMERE
2338 "",
2339 "",
2340 "", // 0x28
2341 "",
2342 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3"
2343 "",
2344 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP
2345 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2346 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX
2347 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX
2348 "",
2349 "",
2350 "",
2351 "",
2352 "",
2353 "",
2354 "",
2355 "",
2356 "",
2357 "",
2358 "Ivy Bridge", // 0x3a
2359 "",
2360 "Haswell", // 0x3c "4th Generation Intel Core Processor"
2361 "", // 0x3d "Next Generation Intel Core Processor"
2362 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2363 "", // 0x3f "Future Generation Intel Xeon Processor"
2364 "",
2365 "",
2366 "",
2367 "",
2368 "",
2369 "Haswell", // 0x45 "4th Generation Intel Core Processor"
2370 "Haswell", // 0x46 "4th Generation Intel Core Processor"
2371 nullptr
2372 };
2373
2374 /* Brand ID is for back compatibility
2375 * Newer CPUs uses the extended brand string */
2376 const char* const _brand_id[] = {
2377 "",
2378 "Celeron processor",
2379 "Pentium III processor",
2380 "Intel Pentium III Xeon processor",
2381 "",
2382 "",
2383 "",
2384 "",
2385 "Intel Pentium 4 processor",
2386 nullptr
2387 };
2388
2389
2390 const char* const _feature_edx_id[] = {
2391 "On-Chip FPU",
2392 "Virtual Mode Extensions",
2393 "Debugging Extensions",
2394 "Page Size Extensions",
2395 "Time Stamp Counter",
2396 "Model Specific Registers",
2397 "Physical Address Extension",
2398 "Machine Check Exceptions",
2399 "CMPXCHG8B Instruction",
2400 "On-Chip APIC",
2401 "",
2402 "Fast System Call",
2403 "Memory Type Range Registers",
2404 "Page Global Enable",
2405 "Machine Check Architecture",
2406 "Conditional Mov Instruction",
2407 "Page Attribute Table",
2408 "36-bit Page Size Extension",
2409 "Processor Serial Number",
2410 "CLFLUSH Instruction",
2411 "",
2412 "Debug Trace Store feature",
2413 "ACPI registers in MSR space",
2414 "Intel Architecture MMX Technology",
2415 "Fast Float Point Save and Restore",
2416 "Streaming SIMD extensions",
2417 "Streaming SIMD extensions 2",
2418 "Self-Snoop",
2419 "Hyper Threading",
2420 "Thermal Monitor",
2421 "",
2422 "Pending Break Enable"
2423 };
2424
2425 const char* const _feature_extended_edx_id[] = {
2426 "",
2427 "",
2428 "",
2429 "",
2430 "",
2431 "",
2432 "",
2433 "",
2434 "",
2435 "",
2436 "",
2437 "SYSCALL/SYSRET",
2438 "",
2439 "",
2440 "",
2441 "",
2442 "",
2443 "",
2444 "",
2445 "",
2446 "Execute Disable Bit",
2447 "",
2448 "",
2449 "",
2450 "",
2451 "",
2452 "",
2453 "RDTSCP",
2454 "",
2455 "Intel 64 Architecture",
2456 "",
2457 ""
2458 };
2459
2460 const char* const _feature_ecx_id[] = {
2461 "Streaming SIMD Extensions 3",
2462 "PCLMULQDQ",
2463 "64-bit DS Area",
2464 "MONITOR/MWAIT instructions",
2465 "CPL Qualified Debug Store",
2466 "Virtual Machine Extensions",
2467 "Safer Mode Extensions",
2468 "Enhanced Intel SpeedStep technology",
2469 "Thermal Monitor 2",
2470 "Supplemental Streaming SIMD Extensions 3",
2471 "L1 Context ID",
2472 "",
2473 "Fused Multiply-Add",
2474 "CMPXCHG16B",
2475 "xTPR Update Control",
2476 "Perfmon and Debug Capability",
2477 "",
2478 "Process-context identifiers",
2479 "Direct Cache Access",
2480 "Streaming SIMD extensions 4.1",
2481 "Streaming SIMD extensions 4.2",
2482 "x2APIC",
2483 "MOVBE",
2484 "Popcount instruction",
2485 "TSC-Deadline",
2486 "AESNI",
2487 "XSAVE",
2488 "OSXSAVE",
2489 "AVX",
2490 "F16C",
2491 "RDRAND",
2492 ""
2493 };
2494
2495 const char* const _feature_extended_ecx_id[] = {
2496 "LAHF/SAHF instruction support",
2497 "Core multi-processor legacy mode",
2498 "",
2499 "",
2500 "",
2501 "Advanced Bit Manipulations: LZCNT",
2502 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2503 "Misaligned SSE mode",
2504 "",
2505 "",
2506 "",
2507 "",
2508 "",
2509 "",
2510 "",
2511 "",
2512 "",
2513 "",
2514 "",
2515 "",
2516 "",
2517 "",
2518 "",
2519 "",
2520 "",
2521 "",
2522 "",
2523 "",
2524 "",
2525 "",
2526 "",
2527 ""
2528 };
2529
2530 const char* VM_Version::cpu_model_description(void) {
2531 uint32_t cpu_family = extended_cpu_family();
2532 uint32_t cpu_model = extended_cpu_model();
2533 const char* model = nullptr;
2534
2535 if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2536 for (uint32_t i = 0; i <= cpu_model; i++) {
2537 model = _model_id_pentium_pro[i];
2538 if (model == nullptr) {
2539 break;
2540 }
2541 }
2542 }
2543 return model;
2544 }
2545
2546 const char* VM_Version::cpu_brand_string(void) {
2547 if (_cpu_brand_string == nullptr) {
2548 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2549 if (nullptr == _cpu_brand_string) {
2550 return nullptr;
2551 }
2552 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2553 if (ret_val != OS_OK) {
2554 FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2555 _cpu_brand_string = nullptr;
2556 }
2557 }
2558 return _cpu_brand_string;
2559 }
2560
2561 const char* VM_Version::cpu_brand(void) {
2562 const char* brand = nullptr;
2563
2564 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2565 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2566 brand = _brand_id[0];
2567 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2568 brand = _brand_id[i];
2569 }
2570 }
2571 return brand;
2572 }
2573
2574 bool VM_Version::cpu_is_em64t(void) {
2575 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2576 }
2577
2578 bool VM_Version::is_netburst(void) {
2579 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2580 }
2581
2582 bool VM_Version::supports_tscinv_ext(void) {
2583 if (!supports_tscinv_bit()) {
2584 return false;
2585 }
2586
2587 if (is_intel()) {
2588 return true;
2589 }
2590
2591 if (is_amd()) {
2592 return !is_amd_Barcelona();
2593 }
2594
2595 if (is_hygon()) {
2596 return true;
2597 }
2598
2599 return false;
2600 }
2601
2602 void VM_Version::resolve_cpu_information_details(void) {
2603
2604 // in future we want to base this information on proper cpu
2605 // and cache topology enumeration such as:
2606 // Intel 64 Architecture Processor Topology Enumeration
2607 // which supports system cpu and cache topology enumeration
2608 // either using 2xAPICIDs or initial APICIDs
2609
2610 // currently only rough cpu information estimates
2611 // which will not necessarily reflect the exact configuration of the system
2612
2613 // this is the number of logical hardware threads
2614 // visible to the operating system
2615 _no_of_threads = os::processor_count();
2616
2617 // find out number of threads per cpu package
2618 int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2619 if (threads_per_package == 0) {
2620 // Fallback code to avoid div by zero in subsequent code.
2621 // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2622 threads_per_package = threads_per_core() * cores_per_cpu();
2623 }
2624
2625 // use amount of threads visible to the process in order to guess number of sockets
2626 _no_of_sockets = _no_of_threads / threads_per_package;
2627
2628 // process might only see a subset of the total number of threads
2629 // from a single processor package. Virtualization/resource management for example.
2630 // If so then just write a hard 1 as num of pkgs.
2631 if (0 == _no_of_sockets) {
2632 _no_of_sockets = 1;
2633 }
2634
2635 // estimate the number of cores
2636 _no_of_cores = cores_per_cpu() * _no_of_sockets;
2637 }
2638
2639
2640 const char* VM_Version::cpu_family_description(void) {
2641 int cpu_family_id = extended_cpu_family();
2642 if (is_amd()) {
2643 if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2644 return _family_id_amd[cpu_family_id];
2645 }
2646 }
2647 if (is_intel()) {
2648 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2649 return cpu_model_description();
2650 }
2651 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2652 return _family_id_intel[cpu_family_id];
2653 }
2654 }
2655 if (is_zx()) {
2656 int cpu_model_id = extended_cpu_model();
2657 if (cpu_family_id == 7) {
2658 switch (cpu_model_id) {
2659 case 0x1B:
2660 return "wudaokou";
2661 case 0x3B:
2662 return "lujiazui";
2663 case 0x5B:
2664 return "yongfeng";
2665 case 0x6B:
2666 return "shijidadao";
2667 }
2668 } else if (cpu_family_id == 6) {
2669 return "zhangjiang";
2670 }
2671 }
2672 if (is_hygon()) {
2673 return "Dhyana";
2674 }
2675 return "Unknown x86";
2676 }
2677
2678 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2679 assert(buf != nullptr, "buffer is null!");
2680 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2681
2682 const char* cpu_type = nullptr;
2683 const char* x64 = nullptr;
2684
2685 if (is_intel()) {
2686 cpu_type = "Intel";
2687 x64 = cpu_is_em64t() ? " Intel64" : "";
2688 } else if (is_amd()) {
2689 cpu_type = "AMD";
2690 x64 = cpu_is_em64t() ? " AMD64" : "";
2691 } else if (is_zx()) {
2692 cpu_type = "Zhaoxin";
2693 x64 = cpu_is_em64t() ? " x86_64" : "";
2694 } else if (is_hygon()) {
2695 cpu_type = "Hygon";
2696 x64 = cpu_is_em64t() ? " AMD64" : "";
2697 } else {
2698 cpu_type = "Unknown x86";
2699 x64 = cpu_is_em64t() ? " x86_64" : "";
2700 }
2701
2702 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2703 cpu_type,
2704 cpu_family_description(),
2705 supports_ht() ? " (HT)" : "",
2706 supports_sse3() ? " SSE3" : "",
2707 supports_ssse3() ? " SSSE3" : "",
2708 supports_sse4_1() ? " SSE4.1" : "",
2709 supports_sse4_2() ? " SSE4.2" : "",
2710 supports_sse4a() ? " SSE4A" : "",
2711 is_netburst() ? " Netburst" : "",
2712 is_intel_family_core() ? " Core" : "",
2713 x64);
2714
2715 return OS_OK;
2716 }
2717
2718 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2719 assert(buf != nullptr, "buffer is null!");
2720 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2721 assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2722
2723 // invoke newly generated asm code to fetch CPU Brand String
2724 getCPUIDBrandString_stub(&_cpuid_info);
2725
2726 // fetch results into buffer
2727 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0;
2728 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1;
2729 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2;
2730 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2731 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2732 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2733 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2734 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2735 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2736 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2737 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2738 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2739
2740 return OS_OK;
2741 }
2742
2743 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2744 guarantee(buf != nullptr, "buffer is null!");
2745 guarantee(buf_len > 0, "buffer len not enough!");
2746
2747 unsigned int flag = 0;
2748 unsigned int fi = 0;
2749 size_t written = 0;
2750 const char* prefix = "";
2751
2752 #define WRITE_TO_BUF(string) \
2753 { \
2754 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2755 if (res < 0) { \
2756 return buf_len - 1; \
2757 } \
2758 written += res; \
2759 if (prefix[0] == '\0') { \
2760 prefix = ", "; \
2761 } \
2762 }
2763
2764 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2765 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2766 continue; /* no hyperthreading */
2767 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2768 continue; /* no fast system call */
2769 }
2770 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2771 WRITE_TO_BUF(_feature_edx_id[fi]);
2772 }
2773 }
2774
2775 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2776 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2777 WRITE_TO_BUF(_feature_ecx_id[fi]);
2778 }
2779 }
2780
2781 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2782 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2783 WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2784 }
2785 }
2786
2787 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2788 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2789 WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2790 }
2791 }
2792
2793 if (supports_tscinv_bit()) {
2794 WRITE_TO_BUF("Invariant TSC");
2795 }
2796
2797 if (supports_hybrid()) {
2798 WRITE_TO_BUF("Hybrid Architecture");
2799 }
2800
2801 return written;
2802 }
2803
2804 /**
2805 * Write a detailed description of the cpu to a given buffer, including
2806 * feature set.
2807 */
2808 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2809 assert(buf != nullptr, "buffer is null!");
2810 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2811
2812 static const char* unknown = "<unknown>";
2813 char vendor_id[VENDOR_LENGTH];
2814 const char* family = nullptr;
2815 const char* model = nullptr;
2816 const char* brand = nullptr;
2817 int outputLen = 0;
2818
2819 family = cpu_family_description();
2820 if (family == nullptr) {
2821 family = unknown;
2822 }
2823
2824 model = cpu_model_description();
2825 if (model == nullptr) {
2826 model = unknown;
2827 }
2828
2829 brand = cpu_brand_string();
2830
2831 if (brand == nullptr) {
2832 brand = cpu_brand();
2833 if (brand == nullptr) {
2834 brand = unknown;
2835 }
2836 }
2837
2838 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2839 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2840 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2841 vendor_id[VENDOR_LENGTH-1] = '\0';
2842
2843 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2844 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2845 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2846 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2847 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2848 "Supports: ",
2849 brand,
2850 vendor_id,
2851 family,
2852 extended_cpu_family(),
2853 model,
2854 extended_cpu_model(),
2855 cpu_stepping(),
2856 _cpuid_info.std_cpuid1_eax.bits.ext_family,
2857 _cpuid_info.std_cpuid1_eax.bits.ext_model,
2858 _cpuid_info.std_cpuid1_eax.bits.proc_type,
2859 _cpuid_info.std_cpuid1_eax.value,
2860 _cpuid_info.std_cpuid1_ebx.value,
2861 _cpuid_info.std_cpuid1_ecx.value,
2862 _cpuid_info.std_cpuid1_edx.value,
2863 _cpuid_info.ext_cpuid1_eax,
2864 _cpuid_info.ext_cpuid1_ebx,
2865 _cpuid_info.ext_cpuid1_ecx,
2866 _cpuid_info.ext_cpuid1_edx);
2867
2868 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2869 if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2870 return OS_ERR;
2871 }
2872
2873 cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2874
2875 return OS_OK;
2876 }
2877
2878
2879 // Fill in Abstract_VM_Version statics
2880 void VM_Version::initialize_cpu_information() {
2881 assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2882 assert(!_initialized, "shouldn't be initialized yet");
2883 resolve_cpu_information_details();
2884
2885 // initialize cpu_name and cpu_desc
2886 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2887 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2888 _initialized = true;
2889 }
2890
2891 /**
2892 * For information about extracting the frequency from the cpu brand string, please see:
2893 *
2894 * Intel Processor Identification and the CPUID Instruction
2895 * Application Note 485
2896 * May 2012
2897 *
2898 * The return value is the frequency in Hz.
2899 */
2900 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2901 const char* const brand_string = cpu_brand_string();
2902 if (brand_string == nullptr) {
2903 return 0;
2904 }
2905 const int64_t MEGA = 1000000;
2906 int64_t multiplier = 0;
2907 int64_t frequency = 0;
2908 uint8_t idx = 0;
2909 // The brand string buffer is at most 48 bytes.
2910 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2911 for (; idx < 48-2; ++idx) {
2912 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2913 // Search brand string for "yHz" where y is M, G, or T.
2914 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2915 if (brand_string[idx] == 'M') {
2916 multiplier = MEGA;
2917 } else if (brand_string[idx] == 'G') {
2918 multiplier = MEGA * 1000;
2919 } else if (brand_string[idx] == 'T') {
2920 multiplier = MEGA * MEGA;
2921 }
2922 break;
2923 }
2924 }
2925 if (multiplier > 0) {
2926 // Compute frequency (in Hz) from brand string.
2927 if (brand_string[idx-3] == '.') { // if format is "x.xx"
2928 frequency = (brand_string[idx-4] - '0') * multiplier;
2929 frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2930 frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2931 } else { // format is "xxxx"
2932 frequency = (brand_string[idx-4] - '0') * 1000;
2933 frequency += (brand_string[idx-3] - '0') * 100;
2934 frequency += (brand_string[idx-2] - '0') * 10;
2935 frequency += (brand_string[idx-1] - '0');
2936 frequency *= multiplier;
2937 }
2938 }
2939 return frequency;
2940 }
2941
2942
2943 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2944 if (_max_qualified_cpu_frequency == 0) {
2945 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2946 }
2947 return _max_qualified_cpu_frequency;
2948 }
2949
2950 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2951 VM_Features vm_features;
2952 if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2953 vm_features.set_feature(CPU_CX8);
2954 if (std_cpuid1_edx.bits.cmov != 0)
2955 vm_features.set_feature(CPU_CMOV);
2956 if (std_cpuid1_edx.bits.clflush != 0)
2957 vm_features.set_feature(CPU_FLUSH);
2958 // clflush should always be available on x86_64
2959 // if not we are in real trouble because we rely on it
2960 // to flush the code cache.
2961 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2962 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2963 ext_cpuid1_edx.bits.fxsr != 0))
2964 vm_features.set_feature(CPU_FXSR);
2965 // HT flag is set for multi-core processors also.
2966 if (threads_per_core() > 1)
2967 vm_features.set_feature(CPU_HT);
2968 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2969 ext_cpuid1_edx.bits.mmx != 0))
2970 vm_features.set_feature(CPU_MMX);
2971 if (std_cpuid1_edx.bits.sse != 0)
2972 vm_features.set_feature(CPU_SSE);
2973 if (std_cpuid1_edx.bits.sse2 != 0)
2974 vm_features.set_feature(CPU_SSE2);
2975 if (std_cpuid1_ecx.bits.sse3 != 0)
2976 vm_features.set_feature(CPU_SSE3);
2977 if (std_cpuid1_ecx.bits.ssse3 != 0)
2978 vm_features.set_feature(CPU_SSSE3);
2979 if (std_cpuid1_ecx.bits.sse4_1 != 0)
2980 vm_features.set_feature(CPU_SSE4_1);
2981 if (std_cpuid1_ecx.bits.sse4_2 != 0)
2982 vm_features.set_feature(CPU_SSE4_2);
2983 if (std_cpuid1_ecx.bits.popcnt != 0)
2984 vm_features.set_feature(CPU_POPCNT);
2985 if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2986 xem_xcr0_eax.bits.apx_f != 0 &&
2987 std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2988 vm_features.set_feature(CPU_APX_F);
2989 }
2990 if (std_cpuid1_ecx.bits.avx != 0 &&
2991 std_cpuid1_ecx.bits.osxsave != 0 &&
2992 xem_xcr0_eax.bits.sse != 0 &&
2993 xem_xcr0_eax.bits.ymm != 0) {
2994 vm_features.set_feature(CPU_AVX);
2995 vm_features.set_feature(CPU_VZEROUPPER);
2996 if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2997 vm_features.set_feature(CPU_SHA512);
2998 if (std_cpuid1_ecx.bits.f16c != 0)
2999 vm_features.set_feature(CPU_F16C);
3000 if (sef_cpuid7_ebx.bits.avx2 != 0) {
3001 vm_features.set_feature(CPU_AVX2);
3002 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
3003 vm_features.set_feature(CPU_AVX_IFMA);
3004 }
3005 if (sef_cpuid7_ecx.bits.gfni != 0)
3006 vm_features.set_feature(CPU_GFNI);
3007 if (sef_cpuid7_ebx.bits.avx512f != 0 &&
3008 xem_xcr0_eax.bits.opmask != 0 &&
3009 xem_xcr0_eax.bits.zmm512 != 0 &&
3010 xem_xcr0_eax.bits.zmm32 != 0) {
3011 vm_features.set_feature(CPU_AVX512F);
3012 if (sef_cpuid7_ebx.bits.avx512cd != 0)
3013 vm_features.set_feature(CPU_AVX512CD);
3014 if (sef_cpuid7_ebx.bits.avx512dq != 0)
3015 vm_features.set_feature(CPU_AVX512DQ);
3016 if (sef_cpuid7_ebx.bits.avx512ifma != 0)
3017 vm_features.set_feature(CPU_AVX512_IFMA);
3018 if (sef_cpuid7_ebx.bits.avx512pf != 0)
3019 vm_features.set_feature(CPU_AVX512PF);
3020 if (sef_cpuid7_ebx.bits.avx512er != 0)
3021 vm_features.set_feature(CPU_AVX512ER);
3022 if (sef_cpuid7_ebx.bits.avx512bw != 0)
3023 vm_features.set_feature(CPU_AVX512BW);
3024 if (sef_cpuid7_ebx.bits.avx512vl != 0)
3025 vm_features.set_feature(CPU_AVX512VL);
3026 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
3027 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3028 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
3029 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3030 if (sef_cpuid7_ecx.bits.vaes != 0)
3031 vm_features.set_feature(CPU_AVX512_VAES);
3032 if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
3033 vm_features.set_feature(CPU_AVX512_VNNI);
3034 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
3035 vm_features.set_feature(CPU_AVX512_BITALG);
3036 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
3037 vm_features.set_feature(CPU_AVX512_VBMI);
3038 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
3039 vm_features.set_feature(CPU_AVX512_VBMI2);
3040 }
3041 if (is_intel()) {
3042 if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
3043 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
3044 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
3045 xem_xcr0_eax.bits.opmask != 0 &&
3046 xem_xcr0_eax.bits.zmm512 != 0 &&
3047 xem_xcr0_eax.bits.zmm32 != 0) {
3048 vm_features.set_feature(CPU_AVX10_1);
3049 vm_features.set_feature(CPU_AVX512F);
3050 vm_features.set_feature(CPU_AVX512CD);
3051 vm_features.set_feature(CPU_AVX512DQ);
3052 vm_features.set_feature(CPU_AVX512PF);
3053 vm_features.set_feature(CPU_AVX512ER);
3054 vm_features.set_feature(CPU_AVX512BW);
3055 vm_features.set_feature(CPU_AVX512VL);
3056 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3057 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3058 vm_features.set_feature(CPU_AVX512_VAES);
3059 vm_features.set_feature(CPU_AVX512_VNNI);
3060 vm_features.set_feature(CPU_AVX512_BITALG);
3061 vm_features.set_feature(CPU_AVX512_VBMI);
3062 vm_features.set_feature(CPU_AVX512_VBMI2);
3063 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3064 vm_features.set_feature(CPU_AVX10_2);
3065 }
3066 }
3067 }
3068 }
3069
3070 if (std_cpuid1_ecx.bits.hv != 0)
3071 vm_features.set_feature(CPU_HV);
3072 if (sef_cpuid7_ebx.bits.bmi1 != 0)
3073 vm_features.set_feature(CPU_BMI1);
3074 if (std_cpuid1_edx.bits.tsc != 0)
3075 vm_features.set_feature(CPU_TSC);
3076 if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3077 vm_features.set_feature(CPU_TSCINV_BIT);
3078 if (std_cpuid1_ecx.bits.aes != 0)
3079 vm_features.set_feature(CPU_AES);
3080 if (ext_cpuid1_ecx.bits.lzcnt != 0)
3081 vm_features.set_feature(CPU_LZCNT);
3082 if (ext_cpuid1_ecx.bits.prefetchw != 0)
3083 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3084 if (sef_cpuid7_ebx.bits.erms != 0)
3085 vm_features.set_feature(CPU_ERMS);
3086 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3087 vm_features.set_feature(CPU_FSRM);
3088 if (std_cpuid1_ecx.bits.clmul != 0)
3089 vm_features.set_feature(CPU_CLMUL);
3090 if (sef_cpuid7_ebx.bits.rtm != 0)
3091 vm_features.set_feature(CPU_RTM);
3092 if (sef_cpuid7_ebx.bits.adx != 0)
3093 vm_features.set_feature(CPU_ADX);
3094 if (sef_cpuid7_ebx.bits.bmi2 != 0)
3095 vm_features.set_feature(CPU_BMI2);
3096 if (sef_cpuid7_ebx.bits.sha != 0)
3097 vm_features.set_feature(CPU_SHA);
3098 if (std_cpuid1_ecx.bits.fma != 0)
3099 vm_features.set_feature(CPU_FMA);
3100 if (sef_cpuid7_ebx.bits.clflushopt != 0)
3101 vm_features.set_feature(CPU_FLUSHOPT);
3102 if (sef_cpuid7_ebx.bits.clwb != 0)
3103 vm_features.set_feature(CPU_CLWB);
3104 if (ext_cpuid1_edx.bits.rdtscp != 0)
3105 vm_features.set_feature(CPU_RDTSCP);
3106 if (sef_cpuid7_ecx.bits.rdpid != 0)
3107 vm_features.set_feature(CPU_RDPID);
3108
3109 // AMD|Hygon additional features.
3110 if (is_amd_family()) {
3111 // PREFETCHW was checked above, check TDNOW here.
3112 if ((ext_cpuid1_edx.bits.tdnow != 0))
3113 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3114 if (ext_cpuid1_ecx.bits.sse4a != 0)
3115 vm_features.set_feature(CPU_SSE4A);
3116 }
3117
3118 // Intel additional features.
3119 if (is_intel()) {
3120 if (sef_cpuid7_edx.bits.serialize != 0)
3121 vm_features.set_feature(CPU_SERIALIZE);
3122 if (sef_cpuid7_edx.bits.hybrid != 0)
3123 vm_features.set_feature(CPU_HYBRID);
3124 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3125 vm_features.set_feature(CPU_AVX512_FP16);
3126 }
3127
3128 // ZX additional features.
3129 if (is_zx()) {
3130 // We do not know if these are supported by ZX, so we cannot trust
3131 // common CPUID bit for them.
3132 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3133 vm_features.clear_feature(CPU_CLWB);
3134 }
3135
3136 // Protection key features.
3137 if (sef_cpuid7_ecx.bits.pku != 0) {
3138 vm_features.set_feature(CPU_PKU);
3139 }
3140 if (sef_cpuid7_ecx.bits.ospke != 0) {
3141 vm_features.set_feature(CPU_OSPKE);
3142 }
3143
3144 // Control flow enforcement (CET) features.
3145 if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3146 vm_features.set_feature(CPU_CET_SS);
3147 }
3148 if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3149 vm_features.set_feature(CPU_CET_IBT);
3150 }
3151
3152 // Composite features.
3153 if (supports_tscinv_bit() &&
3154 ((is_amd_family() && !is_amd_Barcelona()) ||
3155 is_intel_tsc_synched_at_init())) {
3156 vm_features.set_feature(CPU_TSCINV);
3157 }
3158 return vm_features;
3159 }
3160
3161 bool VM_Version::os_supports_avx_vectors() {
3162 bool retVal = false;
3163 int nreg = 4;
3164 if (supports_evex()) {
3165 // Verify that OS save/restore all bits of EVEX registers
3166 // during signal processing.
3167 retVal = true;
3168 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3169 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3170 retVal = false;
3171 break;
3172 }
3173 }
3174 } else if (supports_avx()) {
3175 // Verify that OS save/restore all bits of AVX registers
3176 // during signal processing.
3177 retVal = true;
3178 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3179 if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3180 retVal = false;
3181 break;
3182 }
3183 }
3184 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3185 if (retVal == false) {
3186 // Verify that OS save/restore all bits of EVEX registers
3187 // during signal processing.
3188 retVal = true;
3189 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3190 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3191 retVal = false;
3192 break;
3193 }
3194 }
3195 }
3196 }
3197 return retVal;
3198 }
3199
3200 bool VM_Version::os_supports_apx_egprs() {
3201 if (!supports_apx_f()) {
3202 return false;
3203 }
3204 if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3205 _cpuid_info.apx_save[1] != egpr_test_value()) {
3206 return false;
3207 }
3208 return true;
3209 }
3210
3211 uint VM_Version::cores_per_cpu() {
3212 uint result = 1;
3213 if (is_intel()) {
3214 bool supports_topology = supports_processor_topology();
3215 if (supports_topology) {
3216 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3217 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3218 }
3219 if (!supports_topology || result == 0) {
3220 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3221 }
3222 } else if (is_amd_family()) {
3223 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3224 if (cpu_family() >= 0x17) { // Zen or later
3225 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3226 }
3227 } else if (is_zx()) {
3228 bool supports_topology = supports_processor_topology();
3229 if (supports_topology) {
3230 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3231 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3232 }
3233 if (!supports_topology || result == 0) {
3234 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3235 }
3236 }
3237 return result;
3238 }
3239
3240 uint VM_Version::threads_per_core() {
3241 uint result = 1;
3242 if (is_intel() && supports_processor_topology()) {
3243 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3244 } else if (is_zx() && supports_processor_topology()) {
3245 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3246 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3247 if (cpu_family() >= 0x17) {
3248 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3249 } else {
3250 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3251 cores_per_cpu();
3252 }
3253 }
3254 return (result == 0 ? 1 : result);
3255 }
3256
3257 uint VM_Version::L1_line_size() {
3258 uint result = 0;
3259 if (is_intel()) {
3260 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3261 } else if (is_amd_family()) {
3262 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3263 } else if (is_zx()) {
3264 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3265 }
3266 if (result < 32) // not defined ?
3267 result = 32; // 32 bytes by default on x86 and other x64
3268 return result;
3269 }
3270
3271 bool VM_Version::is_intel_tsc_synched_at_init() {
3272 if (is_intel_family_core()) {
3273 uint32_t ext_model = extended_cpu_model();
3274 if (ext_model == CPU_MODEL_NEHALEM_EP ||
3275 ext_model == CPU_MODEL_WESTMERE_EP ||
3276 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3277 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3278 // <= 2-socket invariant tsc support. EX versions are usually used
3279 // in > 2-socket systems and likely don't synchronize tscs at
3280 // initialization.
3281 // Code that uses tsc values must be prepared for them to arbitrarily
3282 // jump forward or backward.
3283 return true;
3284 }
3285 }
3286 return false;
3287 }
3288
3289 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3290 // Hardware prefetching (distance/size in bytes):
3291 // Pentium 3 - 64 / 32
3292 // Pentium 4 - 256 / 128
3293 // Athlon - 64 / 32 ????
3294 // Opteron - 128 / 64 only when 2 sequential cache lines accessed
3295 // Core - 128 / 64
3296 //
3297 // Software prefetching (distance in bytes / instruction with best score):
3298 // Pentium 3 - 128 / prefetchnta
3299 // Pentium 4 - 512 / prefetchnta
3300 // Athlon - 128 / prefetchnta
3301 // Opteron - 256 / prefetchnta
3302 // Core - 256 / prefetchnta
3303 // It will be used only when AllocatePrefetchStyle > 0
3304
3305 if (is_amd_family()) { // AMD | Hygon
3306 if (supports_sse2()) {
3307 return 256; // Opteron
3308 } else {
3309 return 128; // Athlon
3310 }
3311 } else if (is_zx()) {
3312 if (supports_sse2()) {
3313 return 256;
3314 } else {
3315 return 128;
3316 }
3317 } else { // Intel
3318 if (supports_sse3() && is_intel_server_family()) {
3319 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3320 return 192;
3321 } else if (use_watermark_prefetch) { // watermark prefetching on Core
3322 return 384;
3323 }
3324 }
3325 if (supports_sse2()) {
3326 if (is_intel_server_family()) {
3327 return 256; // Pentium M, Core, Core2
3328 } else {
3329 return 512; // Pentium 4
3330 }
3331 } else {
3332 return 128; // Pentium 3 (and all other old CPUs)
3333 }
3334 }
3335 }
3336
3337 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3338 assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3339 switch (id) {
3340 case vmIntrinsics::_floatToFloat16:
3341 case vmIntrinsics::_float16ToFloat:
3342 if (!supports_float16()) {
3343 return false;
3344 }
3345 break;
3346 default:
3347 break;
3348 }
3349 return true;
3350 }
3351
3352 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3353 int i = 0;
3354 ss.join([&]() {
3355 const char* str = nullptr;
3356 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3357 if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3358 str = _features_names[i];
3359 }
3360 i += 1;
3361 }
3362 return str;
3363 }, ", ");
3364 }
3365
3366 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3367 VM_Features* features = (VM_Features*)features_buffer;
3368 insert_features_names(*features, ss);
3369 }
3370
3371 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3372 VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3373 VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3374 int i = 0;
3375 ss.join([&]() {
3376 const char* str = nullptr;
3377 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3378 Feature_Flag flag = (Feature_Flag)i;
3379 if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3380 str = _features_names[i];
3381 }
3382 i += 1;
3383 }
3384 return str;
3385 }, ", ");
3386 }
3387
3388 int VM_Version::cpu_features_size() {
3389 return sizeof(VM_Features);
3390 }
3391
3392 void VM_Version::store_cpu_features(void* buf) {
3393 VM_Features copy = _features;
3394 copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
3395 memcpy(buf, ©, sizeof(VM_Features));
3396 }
3397
3398 bool VM_Version::supports_features(void* features_buffer) {
3399 VM_Features* features_to_test = (VM_Features*)features_buffer;
3400 return _features.supports_features(features_to_test);
3401 }