1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/codeBlob.hpp"
29 #include "compiler/compilerDefinitions.inline.hpp"
30 #include "jvm.h"
31 #include "logging/log.hpp"
32 #include "logging/logStream.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "memory/universe.hpp"
35 #include "runtime/globals_extension.hpp"
36 #include "runtime/icache.hpp"
37 #include "runtime/java.hpp"
38 #include "runtime/os.inline.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/vm_version.hpp"
41 #include "utilities/checkedCast.hpp"
42 #include "utilities/ostream.hpp"
43 #include "utilities/powerOfTwo.hpp"
44 #include "utilities/virtualizationSupport.hpp"
45
46 int VM_Version::_cpu;
47 int VM_Version::_model;
48 int VM_Version::_stepping;
49 bool VM_Version::_has_intel_jcc_erratum;
50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
51
52 #define DECLARE_CPU_FEATURE_NAME(id, name) XSTR(name),
53 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
54 #undef DECLARE_CPU_FEATURE_NAME
55
56 // Address of instruction which causes SEGV
57 address VM_Version::_cpuinfo_segv_addr = nullptr;
58 // Address of instruction after the one which causes SEGV
59 address VM_Version::_cpuinfo_cont_addr = nullptr;
60 // Address of instruction which causes APX specific SEGV
61 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
62 // Address of instruction after the one which causes APX specific SEGV
63 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
64
65 static BufferBlob* stub_blob;
66 static const int stub_size = 2550;
67
68 VM_Version::VM_Features VM_Version::_features;
69 VM_Version::VM_Features VM_Version::_cpu_features;
70
71 extern "C" {
72 typedef void (*get_cpu_info_stub_t)(void*);
73 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
74 typedef void (*clear_apx_test_state_t)(void);
75 typedef void (*getCPUIDBrandString_stub_t)(void*);
76 }
77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
78 static detect_virt_stub_t detect_virt_stub = nullptr;
79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
80 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
81
82 #define CPUID_STANDARD_FN 0x0
83 #define CPUID_STANDARD_FN_1 0x1
84 #define CPUID_STANDARD_FN_4 0x4
85 #define CPUID_STANDARD_FN_B 0xb
86
87 #define CPUID_EXTENDED_FN 0x80000000
88 #define CPUID_EXTENDED_FN_1 0x80000001
89 #define CPUID_EXTENDED_FN_2 0x80000002
90 #define CPUID_EXTENDED_FN_3 0x80000003
91 #define CPUID_EXTENDED_FN_4 0x80000004
92 #define CPUID_EXTENDED_FN_7 0x80000007
93 #define CPUID_EXTENDED_FN_8 0x80000008
94
95 class VM_Version_StubGenerator: public StubCodeGenerator {
96 public:
97
98 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
99
100 address clear_apx_test_state() {
101 # define __ _masm->
102 address start = __ pc();
103 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
104 // handling guarantees that preserved register values post signal handling were
105 // re-instantiated by operating system and not because they were not modified externally.
106
107 bool save_apx = UseAPX;
108 VM_Version::set_apx_cpuFeatures();
109 UseAPX = true;
110 // EGPR state save/restoration.
111 __ mov64(r16, 0L);
112 __ mov64(r31, 0L);
113 UseAPX = save_apx;
114 VM_Version::clean_cpuFeatures();
115 __ ret(0);
116 return start;
117 }
118
119 address generate_get_cpu_info() {
120 // Flags to test CPU type.
121 const uint32_t HS_EFL_AC = 0x40000;
122 const uint32_t HS_EFL_ID = 0x200000;
123 // Values for when we don't have a CPUID instruction.
124 const int CPU_FAMILY_SHIFT = 8;
125 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
126 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
127 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
128
129 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
130 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
131 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
132 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
133
134 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
135 # define __ _masm->
136
137 address start = __ pc();
138
139 //
140 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
141 //
142 // rcx and rdx are first and second argument registers on windows
143
144 __ push(rbp);
145 __ mov(rbp, c_rarg0); // cpuid_info address
146 __ push(rbx);
147 __ push(rsi);
148 __ pushf(); // preserve rbx, and flags
149 __ pop(rax);
150 __ push(rax);
151 __ mov(rcx, rax);
152 //
153 // if we are unable to change the AC flag, we have a 386
154 //
155 __ xorl(rax, HS_EFL_AC);
156 __ push(rax);
157 __ popf();
158 __ pushf();
159 __ pop(rax);
160 __ cmpptr(rax, rcx);
161 __ jccb(Assembler::notEqual, detect_486);
162
163 __ movl(rax, CPU_FAMILY_386);
164 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
165 __ jmp(done);
166
167 //
168 // If we are unable to change the ID flag, we have a 486 which does
169 // not support the "cpuid" instruction.
170 //
171 __ bind(detect_486);
172 __ mov(rax, rcx);
173 __ xorl(rax, HS_EFL_ID);
174 __ push(rax);
175 __ popf();
176 __ pushf();
177 __ pop(rax);
178 __ cmpptr(rcx, rax);
179 __ jccb(Assembler::notEqual, detect_586);
180
181 __ bind(cpu486);
182 __ movl(rax, CPU_FAMILY_486);
183 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
184 __ jmp(done);
185
186 //
187 // At this point, we have a chip which supports the "cpuid" instruction
188 //
189 __ bind(detect_586);
190 __ xorl(rax, rax);
191 __ cpuid();
192 __ orl(rax, rax);
193 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
194 // value of at least 1, we give up and
195 // assume a 486
196 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
197 __ movl(Address(rsi, 0), rax);
198 __ movl(Address(rsi, 4), rbx);
199 __ movl(Address(rsi, 8), rcx);
200 __ movl(Address(rsi,12), rdx);
201
202 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
203 __ jccb(Assembler::belowEqual, std_cpuid4);
204
205 //
206 // cpuid(0xB) Processor Topology
207 //
208 __ movl(rax, 0xb);
209 __ xorl(rcx, rcx); // Threads level
210 __ cpuid();
211
212 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
213 __ movl(Address(rsi, 0), rax);
214 __ movl(Address(rsi, 4), rbx);
215 __ movl(Address(rsi, 8), rcx);
216 __ movl(Address(rsi,12), rdx);
217
218 __ movl(rax, 0xb);
219 __ movl(rcx, 1); // Cores level
220 __ cpuid();
221 __ push(rax);
222 __ andl(rax, 0x1f); // Determine if valid topology level
223 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
224 __ andl(rax, 0xffff);
225 __ pop(rax);
226 __ jccb(Assembler::equal, std_cpuid4);
227
228 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
229 __ movl(Address(rsi, 0), rax);
230 __ movl(Address(rsi, 4), rbx);
231 __ movl(Address(rsi, 8), rcx);
232 __ movl(Address(rsi,12), rdx);
233
234 __ movl(rax, 0xb);
235 __ movl(rcx, 2); // Packages level
236 __ cpuid();
237 __ push(rax);
238 __ andl(rax, 0x1f); // Determine if valid topology level
239 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
240 __ andl(rax, 0xffff);
241 __ pop(rax);
242 __ jccb(Assembler::equal, std_cpuid4);
243
244 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
245 __ movl(Address(rsi, 0), rax);
246 __ movl(Address(rsi, 4), rbx);
247 __ movl(Address(rsi, 8), rcx);
248 __ movl(Address(rsi,12), rdx);
249
250 //
251 // cpuid(0x4) Deterministic cache params
252 //
253 __ bind(std_cpuid4);
254 __ movl(rax, 4);
255 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
256 __ jccb(Assembler::greater, std_cpuid1);
257
258 __ xorl(rcx, rcx); // L1 cache
259 __ cpuid();
260 __ push(rax);
261 __ andl(rax, 0x1f); // Determine if valid cache parameters used
262 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
263 __ pop(rax);
264 __ jccb(Assembler::equal, std_cpuid1);
265
266 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
267 __ movl(Address(rsi, 0), rax);
268 __ movl(Address(rsi, 4), rbx);
269 __ movl(Address(rsi, 8), rcx);
270 __ movl(Address(rsi,12), rdx);
271
272 //
273 // Standard cpuid(0x1)
274 //
275 __ bind(std_cpuid1);
276 __ movl(rax, 1);
277 __ cpuid();
278 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
279 __ movl(Address(rsi, 0), rax);
280 __ movl(Address(rsi, 4), rbx);
281 __ movl(Address(rsi, 8), rcx);
282 __ movl(Address(rsi,12), rdx);
283
284 //
285 // Check if OS has enabled XGETBV instruction to access XCR0
286 // (OSXSAVE feature flag) and CPU supports AVX
287 //
288 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
289 __ cmpl(rcx, 0x18000000);
290 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
291
292 //
293 // XCR0, XFEATURE_ENABLED_MASK register
294 //
295 __ xorl(rcx, rcx); // zero for XCR0 register
296 __ xgetbv();
297 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
298 __ movl(Address(rsi, 0), rax);
299 __ movl(Address(rsi, 4), rdx);
300
301 //
302 // cpuid(0x7) Structured Extended Features Enumeration Leaf.
303 //
304 __ bind(sef_cpuid);
305 __ movl(rax, 7);
306 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
307 __ jccb(Assembler::greater, ext_cpuid);
308 // ECX = 0
309 __ xorl(rcx, rcx);
310 __ cpuid();
311 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
312 __ movl(Address(rsi, 0), rax);
313 __ movl(Address(rsi, 4), rbx);
314 __ movl(Address(rsi, 8), rcx);
315 __ movl(Address(rsi, 12), rdx);
316
317 //
318 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
319 //
320 __ bind(sefsl1_cpuid);
321 __ movl(rax, 7);
322 __ movl(rcx, 1);
323 __ cpuid();
324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
325 __ movl(Address(rsi, 0), rax);
326 __ movl(Address(rsi, 4), rdx);
327
328 //
329 // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
330 //
331 __ bind(std_cpuid29);
332 __ movl(rax, 0x29);
333 __ movl(rcx, 0);
334 __ cpuid();
335 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
336 __ movl(Address(rsi, 0), rbx);
337
338 //
339 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
340 //
341 __ bind(std_cpuid24);
342 __ movl(rax, 0x24);
343 __ movl(rcx, 0);
344 __ cpuid();
345 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
346 __ movl(Address(rsi, 0), rax);
347 __ movl(Address(rsi, 4), rbx);
348
349 //
350 // Extended cpuid(0x80000000)
351 //
352 __ bind(ext_cpuid);
353 __ movl(rax, 0x80000000);
354 __ cpuid();
355 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
356 __ jcc(Assembler::belowEqual, done);
357 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
358 __ jcc(Assembler::belowEqual, ext_cpuid1);
359 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
360 __ jccb(Assembler::belowEqual, ext_cpuid5);
361 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
362 __ jccb(Assembler::belowEqual, ext_cpuid7);
363 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
364 __ jccb(Assembler::belowEqual, ext_cpuid8);
365 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
366 __ jccb(Assembler::below, ext_cpuid8);
367 //
368 // Extended cpuid(0x8000001E)
369 //
370 __ movl(rax, 0x8000001E);
371 __ cpuid();
372 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
373 __ movl(Address(rsi, 0), rax);
374 __ movl(Address(rsi, 4), rbx);
375 __ movl(Address(rsi, 8), rcx);
376 __ movl(Address(rsi,12), rdx);
377
378 //
379 // Extended cpuid(0x80000008)
380 //
381 __ bind(ext_cpuid8);
382 __ movl(rax, 0x80000008);
383 __ cpuid();
384 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
385 __ movl(Address(rsi, 0), rax);
386 __ movl(Address(rsi, 4), rbx);
387 __ movl(Address(rsi, 8), rcx);
388 __ movl(Address(rsi,12), rdx);
389
390 //
391 // Extended cpuid(0x80000007)
392 //
393 __ bind(ext_cpuid7);
394 __ movl(rax, 0x80000007);
395 __ cpuid();
396 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
397 __ movl(Address(rsi, 0), rax);
398 __ movl(Address(rsi, 4), rbx);
399 __ movl(Address(rsi, 8), rcx);
400 __ movl(Address(rsi,12), rdx);
401
402 //
403 // Extended cpuid(0x80000005)
404 //
405 __ bind(ext_cpuid5);
406 __ movl(rax, 0x80000005);
407 __ cpuid();
408 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
409 __ movl(Address(rsi, 0), rax);
410 __ movl(Address(rsi, 4), rbx);
411 __ movl(Address(rsi, 8), rcx);
412 __ movl(Address(rsi,12), rdx);
413
414 //
415 // Extended cpuid(0x80000001)
416 //
417 __ bind(ext_cpuid1);
418 __ movl(rax, 0x80000001);
419 __ cpuid();
420 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
421 __ movl(Address(rsi, 0), rax);
422 __ movl(Address(rsi, 4), rbx);
423 __ movl(Address(rsi, 8), rcx);
424 __ movl(Address(rsi,12), rdx);
425
426 //
427 // Check if OS has enabled XGETBV instruction to access XCR0
428 // (OSXSAVE feature flag) and CPU supports APX
429 //
430 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
431 // and XCRO[19] bit for OS support to save/restore extended GPR state.
432 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
433 __ movl(rax, 0x200000);
434 __ andl(rax, Address(rsi, 4));
435 __ jcc(Assembler::equal, vector_save_restore);
436 // check _cpuid_info.xem_xcr0_eax.bits.apx_f
437 __ movl(rax, 0x80000);
438 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
439 __ jcc(Assembler::equal, vector_save_restore);
440
441 bool save_apx = UseAPX;
442 VM_Version::set_apx_cpuFeatures();
443 UseAPX = true;
444 __ mov64(r16, VM_Version::egpr_test_value());
445 __ mov64(r31, VM_Version::egpr_test_value());
446 __ xorl(rsi, rsi);
447 VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
448 // Generate SEGV
449 __ movl(rax, Address(rsi, 0));
450
451 VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
452 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
453 __ movq(Address(rsi, 0), r16);
454 __ movq(Address(rsi, 8), r31);
455
456 //
457 // Query CPUID 0xD.19 for APX XSAVE offset
458 // Extended State Enumeration Sub-leaf 19 (APX)
459 // EAX = size of APX state (should be 128)
460 // EBX = offset in standard XSAVE format
461 //
462 __ movl(rax, 0xD);
463 __ movl(rcx, 19);
464 __ cpuid();
465 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
466 __ movl(Address(rsi, 0), rax);
467 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
468 __ movl(Address(rsi, 0), rbx);
469
470 UseAPX = save_apx;
471 __ bind(vector_save_restore);
472 //
473 // Check if OS has enabled XGETBV instruction to access XCR0
474 // (OSXSAVE feature flag) and CPU supports AVX
475 //
476 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
477 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
478 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
479 __ cmpl(rcx, 0x18000000);
480 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
481
482 __ movl(rax, 0x6);
483 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
484 __ cmpl(rax, 0x6);
485 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
486
487 // we need to bridge farther than imm8, so we use this island as a thunk
488 __ bind(done);
489 __ jmp(wrapup);
490
491 __ bind(start_simd_check);
492 // Query CPUID 0xD sub-leaf 5, 6, and 7 offsets for AVX-512 XSAVE components
493 __ movl(rax, 0xD);
494 __ movl(rcx, 5);
495 __ cpuid();
496 __ lea(rsi, Address(rbp, in_bytes(VM_Version::opmask_xstate_offset_offset())));
497 __ movl(Address(rsi, 0), rbx);
498
499 __ movl(rax, 0xD);
500 __ movl(rcx, 6);
501 __ cpuid();
502 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm0to15_hi256_xstate_offset_offset())));
503 __ movl(Address(rsi, 0), rbx);
504
505 __ movl(rax, 0xD);
506 __ movl(rcx, 7);
507 __ cpuid();
508 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm16to31_xstate_offset_offset())));
509 __ movl(Address(rsi, 0), rbx);
510
511 //
512 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
513 // registers are not restored after a signal processing.
514 // Generate SEGV here (reference through null)
515 // and check upper YMM/ZMM bits after it.
516 //
517 int saved_useavx = UseAVX;
518
519 // If UseAVX is uninitialized or is set by the user to include EVEX
520 if (use_evex) {
521 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
522 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
523 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
524 __ movl(rax, 0x10000);
525 __ andl(rax, Address(rsi, 4));
526 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
527 __ movl(rbx, 0x80000);
528 __ andl(rbx, Address(rsi, 4));
529 __ orl(rax, rbx);
530 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
531 // check _cpuid_info.xem_xcr0_eax.bits.opmask
532 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
533 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
534 __ movl(rax, 0xE0);
535 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
536 __ cmpl(rax, 0xE0);
537 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
538
539 if (FLAG_IS_DEFAULT(UseAVX)) {
540 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
541 __ movl(rax, Address(rsi, 0));
542 __ cmpl(rax, 0x50654); // If it is Skylake
543 __ jcc(Assembler::equal, legacy_setup);
544 }
545 // EVEX setup: run in lowest evex mode
546 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
547 UseAVX = 3;
548 #ifdef _WINDOWS
549 // xmm5-xmm15 are not preserved by caller on windows
550 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
551 __ subptr(rsp, 64);
552 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
553 __ subptr(rsp, 64);
554 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
555 __ subptr(rsp, 64);
556 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
557 #endif // _WINDOWS
558
559 // load value into all 64 bytes of zmm7 register
560 __ movl(rcx, VM_Version::ymm_test_value());
561 __ movdl(xmm0, rcx);
562 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
563 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
564 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
565 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
566 VM_Version::clean_cpuFeatures();
567 __ jmp(save_restore_except);
568 }
569
570 __ bind(legacy_setup);
571 // AVX setup
572 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
573 UseAVX = 1;
574 #ifdef _WINDOWS
575 __ subptr(rsp, 32);
576 __ vmovdqu(Address(rsp, 0), xmm7);
577 __ subptr(rsp, 32);
578 __ vmovdqu(Address(rsp, 0), xmm8);
579 __ subptr(rsp, 32);
580 __ vmovdqu(Address(rsp, 0), xmm15);
581 #endif // _WINDOWS
582
583 // load value into all 32 bytes of ymm7 register
584 __ movl(rcx, VM_Version::ymm_test_value());
585
586 __ movdl(xmm0, rcx);
587 __ pshufd(xmm0, xmm0, 0x00);
588 __ vinsertf128_high(xmm0, xmm0);
589 __ vmovdqu(xmm7, xmm0);
590 __ vmovdqu(xmm8, xmm0);
591 __ vmovdqu(xmm15, xmm0);
592 VM_Version::clean_cpuFeatures();
593
594 __ bind(save_restore_except);
595 __ xorl(rsi, rsi);
596 VM_Version::set_cpuinfo_segv_addr(__ pc());
597 // Generate SEGV
598 __ movl(rax, Address(rsi, 0));
599
600 VM_Version::set_cpuinfo_cont_addr(__ pc());
601 // Returns here after signal. Save xmm0 to check it later.
602
603 // If UseAVX is uninitialized or is set by the user to include EVEX
604 if (use_evex) {
605 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
606 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
607 __ movl(rax, 0x10000);
608 __ andl(rax, Address(rsi, 4));
609 __ jcc(Assembler::equal, legacy_save_restore);
610 // check _cpuid_info.xem_xcr0_eax.bits.opmask
611 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
612 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
613 __ movl(rax, 0xE0);
614 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
615 __ cmpl(rax, 0xE0);
616 __ jcc(Assembler::notEqual, legacy_save_restore);
617
618 if (FLAG_IS_DEFAULT(UseAVX)) {
619 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
620 __ movl(rax, Address(rsi, 0));
621 __ cmpl(rax, 0x50654); // If it is Skylake
622 __ jcc(Assembler::equal, legacy_save_restore);
623 }
624 // EVEX check: run in lowest evex mode
625 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
626 UseAVX = 3;
627 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
628 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
629 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
630 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
631 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
632
633 #ifdef _WINDOWS
634 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
635 __ addptr(rsp, 64);
636 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
637 __ addptr(rsp, 64);
638 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
639 __ addptr(rsp, 64);
640 #endif // _WINDOWS
641 generate_vzeroupper(wrapup);
642 VM_Version::clean_cpuFeatures();
643 UseAVX = saved_useavx;
644 __ jmp(wrapup);
645 }
646
647 __ bind(legacy_save_restore);
648 // AVX check
649 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
650 UseAVX = 1;
651 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
652 __ vmovdqu(Address(rsi, 0), xmm0);
653 __ vmovdqu(Address(rsi, 32), xmm7);
654 __ vmovdqu(Address(rsi, 64), xmm8);
655 __ vmovdqu(Address(rsi, 96), xmm15);
656
657 #ifdef _WINDOWS
658 __ vmovdqu(xmm15, Address(rsp, 0));
659 __ addptr(rsp, 32);
660 __ vmovdqu(xmm8, Address(rsp, 0));
661 __ addptr(rsp, 32);
662 __ vmovdqu(xmm7, Address(rsp, 0));
663 __ addptr(rsp, 32);
664 #endif // _WINDOWS
665
666 generate_vzeroupper(wrapup);
667 VM_Version::clean_cpuFeatures();
668 UseAVX = saved_useavx;
669
670 __ bind(wrapup);
671 __ popf();
672 __ pop(rsi);
673 __ pop(rbx);
674 __ pop(rbp);
675 __ ret(0);
676
677 # undef __
678
679 return start;
680 };
681 void generate_vzeroupper(Label& L_wrapup) {
682 # define __ _masm->
683 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
684 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
685 __ jcc(Assembler::notEqual, L_wrapup);
686 __ movl(rcx, 0x0FFF0FF0);
687 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
688 __ andl(rcx, Address(rsi, 0));
689 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
690 __ jcc(Assembler::equal, L_wrapup);
691 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
692 __ jcc(Assembler::equal, L_wrapup);
693 // vzeroupper() will use a pre-computed instruction sequence that we
694 // can't compute until after we've determined CPU capabilities. Use
695 // uncached variant here directly to be able to bootstrap correctly
696 __ vzeroupper_uncached();
697 # undef __
698 }
699 address generate_detect_virt() {
700 StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
701 # define __ _masm->
702
703 address start = __ pc();
704
705 // Evacuate callee-saved registers
706 __ push(rbp);
707 __ push(rbx);
708 __ push(rsi); // for Windows
709
710 __ mov(rax, c_rarg0); // CPUID leaf
711 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
712
713 __ cpuid();
714
715 // Store result to register array
716 __ movl(Address(rsi, 0), rax);
717 __ movl(Address(rsi, 4), rbx);
718 __ movl(Address(rsi, 8), rcx);
719 __ movl(Address(rsi, 12), rdx);
720
721 // Epilogue
722 __ pop(rsi);
723 __ pop(rbx);
724 __ pop(rbp);
725 __ ret(0);
726
727 # undef __
728
729 return start;
730 };
731
732
733 address generate_getCPUIDBrandString(void) {
734 // Flags to test CPU type.
735 const uint32_t HS_EFL_AC = 0x40000;
736 const uint32_t HS_EFL_ID = 0x200000;
737 // Values for when we don't have a CPUID instruction.
738 const int CPU_FAMILY_SHIFT = 8;
739 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
740 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
741
742 Label detect_486, cpu486, detect_586, done, ext_cpuid;
743
744 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
745 # define __ _masm->
746
747 address start = __ pc();
748
749 //
750 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
751 //
752 // rcx and rdx are first and second argument registers on windows
753
754 __ push(rbp);
755 __ mov(rbp, c_rarg0); // cpuid_info address
756 __ push(rbx);
757 __ push(rsi);
758 __ pushf(); // preserve rbx, and flags
759 __ pop(rax);
760 __ push(rax);
761 __ mov(rcx, rax);
762 //
763 // if we are unable to change the AC flag, we have a 386
764 //
765 __ xorl(rax, HS_EFL_AC);
766 __ push(rax);
767 __ popf();
768 __ pushf();
769 __ pop(rax);
770 __ cmpptr(rax, rcx);
771 __ jccb(Assembler::notEqual, detect_486);
772
773 __ movl(rax, CPU_FAMILY_386);
774 __ jmp(done);
775
776 //
777 // If we are unable to change the ID flag, we have a 486 which does
778 // not support the "cpuid" instruction.
779 //
780 __ bind(detect_486);
781 __ mov(rax, rcx);
782 __ xorl(rax, HS_EFL_ID);
783 __ push(rax);
784 __ popf();
785 __ pushf();
786 __ pop(rax);
787 __ cmpptr(rcx, rax);
788 __ jccb(Assembler::notEqual, detect_586);
789
790 __ bind(cpu486);
791 __ movl(rax, CPU_FAMILY_486);
792 __ jmp(done);
793
794 //
795 // At this point, we have a chip which supports the "cpuid" instruction
796 //
797 __ bind(detect_586);
798 __ xorl(rax, rax);
799 __ cpuid();
800 __ orl(rax, rax);
801 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
802 // value of at least 1, we give up and
803 // assume a 486
804
805 //
806 // Extended cpuid(0x80000000) for processor brand string detection
807 //
808 __ bind(ext_cpuid);
809 __ movl(rax, CPUID_EXTENDED_FN);
810 __ cpuid();
811 __ cmpl(rax, CPUID_EXTENDED_FN_4);
812 __ jcc(Assembler::below, done);
813
814 //
815 // Extended cpuid(0x80000002) // first 16 bytes in brand string
816 //
817 __ movl(rax, CPUID_EXTENDED_FN_2);
818 __ cpuid();
819 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
820 __ movl(Address(rsi, 0), rax);
821 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
822 __ movl(Address(rsi, 0), rbx);
823 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
824 __ movl(Address(rsi, 0), rcx);
825 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
826 __ movl(Address(rsi,0), rdx);
827
828 //
829 // Extended cpuid(0x80000003) // next 16 bytes in brand string
830 //
831 __ movl(rax, CPUID_EXTENDED_FN_3);
832 __ cpuid();
833 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
834 __ movl(Address(rsi, 0), rax);
835 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
836 __ movl(Address(rsi, 0), rbx);
837 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
838 __ movl(Address(rsi, 0), rcx);
839 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
840 __ movl(Address(rsi,0), rdx);
841
842 //
843 // Extended cpuid(0x80000004) // last 16 bytes in brand string
844 //
845 __ movl(rax, CPUID_EXTENDED_FN_4);
846 __ cpuid();
847 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
848 __ movl(Address(rsi, 0), rax);
849 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
850 __ movl(Address(rsi, 0), rbx);
851 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
852 __ movl(Address(rsi, 0), rcx);
853 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
854 __ movl(Address(rsi,0), rdx);
855
856 //
857 // return
858 //
859 __ bind(done);
860 __ popf();
861 __ pop(rsi);
862 __ pop(rbx);
863 __ pop(rbp);
864 __ ret(0);
865
866 # undef __
867
868 return start;
869 };
870 };
871
872 void VM_Version::get_processor_features() {
873
874 _cpu = 4; // 486 by default
875 _model = 0;
876 _stepping = 0;
877 _logical_processors_per_package = 1;
878 // i486 internal cache is both I&D and has a 16-byte line size
879 _L1_data_cache_line_size = 16;
880
881 // Get raw processor info
882
883 get_cpu_info_stub(&_cpuid_info);
884
885 assert_is_initialized();
886 _cpu = extended_cpu_family();
887 _model = extended_cpu_model();
888 _stepping = cpu_stepping();
889
890 if (cpu_family() > 4) { // it supports CPUID
891 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
892 _cpu_features = _features; // Preserve features
893 // Logical processors are only available on P4s and above,
894 // and only if hyperthreading is available.
895 _logical_processors_per_package = logical_processor_count();
896 _L1_data_cache_line_size = L1_line_size();
897 }
898
899 // xchg and xadd instructions
900 _supports_atomic_getset4 = true;
901 _supports_atomic_getadd4 = true;
902 _supports_atomic_getset8 = true;
903 _supports_atomic_getadd8 = true;
904
905 // assigning this field effectively enables Unsafe.writebackMemory()
906 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
907 // that is only implemented on x86_64 and only if the OS plays ball
908 if (os::supports_map_sync()) {
909 // publish data cache line flush size to generic field, otherwise
910 // let if default to zero thereby disabling writeback
911 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
912 }
913
914 // Check if processor has Intel Ecore
915 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
916 (supports_hybrid() ||
917 _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
918 _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
919 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
920 }
921
922 if (UseSSE < 4) {
923 clear_feature(CPU_SSE4_1);
924 clear_feature(CPU_SSE4_2);
925 }
926
927 if (UseSSE < 3) {
928 clear_feature(CPU_SSE3);
929 clear_feature(CPU_SSSE3);
930 clear_feature(CPU_SSE4A);
931 }
932
933 // ZX cpus specific settings
934 if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
935 if (cpu_family() == 7) {
936 if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
937 UseAVX = 1;
938 } else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
939 UseAVX = 0;
940 }
941 } else if (cpu_family() == 6) {
942 UseAVX = 0;
943 }
944 }
945
946 // UseSSE is set to the smaller of what hardware supports and what
947 // the command line requires. i.e., you cannot set UseSSE to 4 on
948 // older systems which do not support it.
949 int use_sse_limit = 2;
950 if (UseSSE > 3 && supports_sse4_1()) {
951 use_sse_limit = 4;
952 } else if (UseSSE > 2 && supports_sse3()) {
953 use_sse_limit = 3;
954 }
955 if (FLAG_IS_DEFAULT(UseSSE)) {
956 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
957 } else if (UseSSE > use_sse_limit) {
958 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
959 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
960 }
961
962 // first try initial setting and detect what we can support
963 int use_avx_limit = 0;
964 if (UseAVX > 0) {
965 if (UseSSE < 4) {
966 // Don't use AVX if SSE is unavailable or has been disabled.
967 use_avx_limit = 0;
968 } else if (UseAVX > 2 && supports_evex()) {
969 use_avx_limit = 3;
970 } else if (UseAVX > 1 && supports_avx2()) {
971 use_avx_limit = 2;
972 } else if (UseAVX > 0 && supports_avx()) {
973 use_avx_limit = 1;
974 } else {
975 use_avx_limit = 0;
976 }
977 }
978 if (FLAG_IS_DEFAULT(UseAVX)) {
979 // Don't use AVX-512 on older Skylakes unless explicitly requested.
980 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
981 FLAG_SET_DEFAULT(UseAVX, 2);
982 } else {
983 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
984 }
985 }
986
987 if (UseAVX > use_avx_limit) {
988 if (UseSSE < 4) {
989 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
990 } else {
991 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
992 }
993 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
994 }
995
996 if (UseAVX < 3) {
997 clear_feature(CPU_AVX512F);
998 clear_feature(CPU_AVX512DQ);
999 clear_feature(CPU_AVX512CD);
1000 clear_feature(CPU_AVX512BW);
1001 clear_feature(CPU_AVX512ER);
1002 clear_feature(CPU_AVX512PF);
1003 clear_feature(CPU_AVX512VL);
1004 clear_feature(CPU_AVX512_VPOPCNTDQ);
1005 clear_feature(CPU_AVX512_VPCLMULQDQ);
1006 clear_feature(CPU_AVX512_VAES);
1007 clear_feature(CPU_AVX512_VNNI);
1008 clear_feature(CPU_AVX512_VBMI);
1009 clear_feature(CPU_AVX512_VBMI2);
1010 clear_feature(CPU_AVX512_BITALG);
1011 clear_feature(CPU_AVX512_IFMA);
1012 clear_feature(CPU_APX_F);
1013 clear_feature(CPU_AVX512_FP16);
1014 clear_feature(CPU_AVX10_1);
1015 clear_feature(CPU_AVX10_2);
1016 }
1017
1018
1019 if (UseAVX < 2) {
1020 clear_feature(CPU_AVX2);
1021 clear_feature(CPU_AVX_IFMA);
1022 }
1023
1024 if (UseAVX < 1) {
1025 clear_feature(CPU_AVX);
1026 clear_feature(CPU_VZEROUPPER);
1027 clear_feature(CPU_F16C);
1028 clear_feature(CPU_SHA512);
1029 }
1030
1031 if (logical_processors_per_package() == 1) {
1032 // HT processor could be installed on a system which doesn't support HT.
1033 clear_feature(CPU_HT);
1034 }
1035
1036 if (is_intel()) { // Intel cpus specific settings
1037 if (is_knights_family()) {
1038 clear_feature(CPU_VZEROUPPER);
1039 clear_feature(CPU_AVX512BW);
1040 clear_feature(CPU_AVX512VL);
1041 clear_feature(CPU_APX_F);
1042 clear_feature(CPU_AVX512DQ);
1043 clear_feature(CPU_AVX512_VNNI);
1044 clear_feature(CPU_AVX512_VAES);
1045 clear_feature(CPU_AVX512_VPOPCNTDQ);
1046 clear_feature(CPU_AVX512_VPCLMULQDQ);
1047 clear_feature(CPU_AVX512_VBMI);
1048 clear_feature(CPU_AVX512_VBMI2);
1049 clear_feature(CPU_CLWB);
1050 clear_feature(CPU_FLUSHOPT);
1051 clear_feature(CPU_GFNI);
1052 clear_feature(CPU_AVX512_BITALG);
1053 clear_feature(CPU_AVX512_IFMA);
1054 clear_feature(CPU_AVX_IFMA);
1055 clear_feature(CPU_AVX512_FP16);
1056 clear_feature(CPU_AVX10_1);
1057 clear_feature(CPU_AVX10_2);
1058 }
1059 }
1060
1061 // Currently APX support is only enabled for targets supporting AVX512VL feature.
1062 if (supports_apx_f() && os_supports_apx_egprs() && supports_avx512vl()) {
1063 if (FLAG_IS_DEFAULT(UseAPX)) {
1064 FLAG_SET_DEFAULT(UseAPX, false); // by default UseAPX is false
1065 clear_feature(CPU_APX_F);
1066 } else if (!UseAPX) {
1067 clear_feature(CPU_APX_F);
1068 }
1069 } else {
1070 if (!os_supports_apx_egprs() || !supports_avx512vl()) {
1071 clear_feature(CPU_APX_F);
1072 }
1073 if (UseAPX) {
1074 if (!FLAG_IS_DEFAULT(UseAPX)) {
1075 warning("APX instructions are not available on this CPU");
1076 }
1077 FLAG_SET_DEFAULT(UseAPX, false);
1078 }
1079 }
1080
1081 CHECK_CPU_FEATURE(UseCLMUL, CLMUL, supports_clmul(), "CLMUL" MULTI_INST_WARNING_MSG);
1082 CHECK_CPU_FEATURE(UseAES, AES, supports_aes(), "AES" MULTI_INST_WARNING_MSG);
1083 CHECK_CPU_FEATURE(UseFMA, FMA, supports_fma(), "FMA" MULTI_INST_WARNING_MSG);
1084 CHECK_CPU_FEATURE(UseCountLeadingZerosInstruction, LZCNT, supports_lzcnt(), "lzcnt" SINGLE_INST_WARNING_MSG);
1085 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1086 // VEX prefix is generated only when AVX > 0.
1087 CHECK_CPU_FEATURE(UseBMI1Instructions, BMI1, supports_bmi1(), "BMI1" MULTI_INST_WARNING_MSG);
1088
1089 if (supports_bmi2() && supports_avx()) {
1090 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1091 FLAG_SET_DEFAULT(UseBMI2Instructions, true);
1092 } else if (!UseBMI2Instructions) {
1093 clear_feature(CPU_BMI2);
1094 }
1095 } else {
1096 if (!supports_avx()) {
1097 clear_feature(CPU_BMI2);
1098 }
1099 if (UseBMI2Instructions) {
1100 if (!FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1101 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1102 }
1103 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1104 }
1105 }
1106
1107 CHECK_CPU_FEATURE(UsePopCountInstruction, POPCNT, supports_popcnt(), "popcnt" SINGLE_INST_WARNING_MSG);
1108 CHECK_CPU_FEATURE(UseSHA, SHA, supports_sha() || (supports_avx2() && supports_bmi2()), "SHA" MULTI_INST_WARNING_MSG);
1109
1110 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1111 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1112 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1113 } else {
1114 _has_intel_jcc_erratum = IntelJccErratumMitigation;
1115 }
1116
1117 if (X86ICacheSync == -1) {
1118 // Auto-detect, choosing the best performant one that still flushes
1119 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1120 if (supports_clwb()) {
1121 FLAG_SET_ERGO(X86ICacheSync, 3);
1122 } else if (supports_clflushopt()) {
1123 FLAG_SET_ERGO(X86ICacheSync, 2);
1124 } else {
1125 FLAG_SET_ERGO(X86ICacheSync, 1);
1126 }
1127 } else {
1128 if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1129 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1130 }
1131 if ((X86ICacheSync == 3) && !supports_clwb()) {
1132 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1133 }
1134 if ((X86ICacheSync == 5) && !supports_serialize()) {
1135 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1136 }
1137 }
1138
1139 stringStream ss(2048);
1140 if (supports_hybrid()) {
1141 ss.print("(hybrid)");
1142 } else {
1143 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1144 }
1145 ss.print(" family %d model %d stepping %d microcode 0x%x",
1146 cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1147 ss.print(", ");
1148 int features_offset = (int)ss.size();
1149 insert_features_names(_features, ss);
1150
1151 _cpu_info_string = ss.as_string(true);
1152 _features_string = _cpu_info_string + features_offset;
1153
1154 // Use AES instructions if available.
1155 if (supports_aes()) {
1156 if (supports_sse3()) {
1157 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1158 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1159 }
1160 } else if (UseAESIntrinsics) {
1161 // The AES intrinsic stubs require AES instruction support (of course)
1162 // but also require sse3 mode or higher for instructions it use.
1163 if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1164 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1165 }
1166 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1167 }
1168 if (!UseAESIntrinsics) {
1169 if (UseAESCTRIntrinsics) {
1170 if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1171 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1172 }
1173 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1174 }
1175 } else {
1176 if (supports_sse4_1()) {
1177 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1178 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1179 }
1180 } else if (UseAESCTRIntrinsics) {
1181 // The AES-CTR intrinsic stubs require AES instruction support (of course)
1182 // but also require sse4.1 mode or higher for instructions it use.
1183 if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1184 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1185 }
1186 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1187 }
1188 }
1189 } else {
1190 if (!cpu_supports_aes()) {
1191 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1192 warning("AES intrinsics are not available on this CPU");
1193 }
1194 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1195 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1196 warning("AES-CTR intrinsics are not available on this CPU");
1197 }
1198 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1199 } else if (!UseAES) {
1200 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1201 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1202 }
1203 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1204 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1205 warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1206 }
1207 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1208 }
1209 }
1210
1211 if (UseCLMUL && (UseSSE > 2)) {
1212 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1213 UseCRC32Intrinsics = true;
1214 }
1215 } else if (UseCRC32Intrinsics) {
1216 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1217 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1218 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1219 }
1220
1221 if (supports_avx2()) {
1222 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1223 UseAdler32Intrinsics = true;
1224 }
1225 } else if (UseAdler32Intrinsics) {
1226 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1227 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1228 }
1229 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1230 }
1231
1232 if (supports_sse4_2() && supports_clmul()) {
1233 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1234 UseCRC32CIntrinsics = true;
1235 }
1236 } else if (UseCRC32CIntrinsics) {
1237 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1238 warning("CRC32C intrinsics are not available on this CPU");
1239 }
1240 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1241 }
1242
1243 // GHASH/GCM intrinsics
1244 if (UseCLMUL && (UseSSE > 2)) {
1245 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1246 UseGHASHIntrinsics = true;
1247 }
1248 } else if (UseGHASHIntrinsics) {
1249 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1250 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1251 }
1252 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1253 }
1254
1255 // ChaCha20 Intrinsics
1256 // As long as the system supports AVX as a baseline we can do a
1257 // SIMD-enabled block function. StubGenerator makes the determination
1258 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1259 // version.
1260 if (UseAVX >= 1) {
1261 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1262 UseChaCha20Intrinsics = true;
1263 }
1264 } else if (UseChaCha20Intrinsics) {
1265 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1266 warning("ChaCha20 intrinsic requires AVX instructions");
1267 }
1268 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1269 }
1270
1271 // Kyber Intrinsics
1272 // Currently we only have them for AVX512
1273 if (supports_evex() && supports_avx512bw()) {
1274 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1275 UseKyberIntrinsics = true;
1276 }
1277 } else if (UseKyberIntrinsics) {
1278 if (!FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1279 warning("Intrinsics for ML-KEM are not available on this CPU.");
1280 }
1281 FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1282 }
1283
1284 // Dilithium Intrinsics
1285 if (UseAVX > 1) {
1286 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1287 UseDilithiumIntrinsics = true;
1288 }
1289 } else if (UseDilithiumIntrinsics) {
1290 if (!FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1291 warning("Intrinsics for ML-DSA are not available on this CPU.");
1292 }
1293 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1294 }
1295
1296 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1297 if (UseAVX >= 2) {
1298 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1299 UseBASE64Intrinsics = true;
1300 }
1301 } else if (UseBASE64Intrinsics) {
1302 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1303 warning("Base64 intrinsic requires EVEX instructions on this CPU");
1304 }
1305 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1306 }
1307
1308 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1309 UseMD5Intrinsics = true;
1310 }
1311
1312 if (supports_sha() && supports_sse4_1() && UseSHA) {
1313 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1314 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1315 }
1316 } else if (UseSHA1Intrinsics) {
1317 if (!FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1318 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1319 }
1320 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1321 }
1322
1323 if (supports_sse4_1() && UseSHA) {
1324 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1325 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1326 }
1327 } else if (UseSHA256Intrinsics) {
1328 if (!FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1329 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1330 }
1331 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1332 }
1333
1334 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1335 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1336 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1337 }
1338 } else if (UseSHA512Intrinsics) {
1339 if (!FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1340 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1341 }
1342 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1343 }
1344
1345 if (UseSHA && ((supports_evex() && supports_avx512vlbw()) ||
1346 (EnableX86ECoreOpts && !supports_hybrid()))) {
1347 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1348 FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1349 }
1350 } else if (UseSHA3Intrinsics) {
1351 if (!FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1352 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1353 }
1354 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1355 }
1356
1357 #ifdef COMPILER2
1358 int max_vector_size = 0;
1359 if (UseAVX == 0 || !os_supports_avx_vectors()) {
1360 // 16 byte vectors (in XMM) are supported with SSE2+
1361 max_vector_size = 16;
1362 } else if (UseAVX == 1 || UseAVX == 2) {
1363 // 32 bytes vectors (in YMM) are only supported with AVX+
1364 max_vector_size = 32;
1365 } else if (UseAVX > 2) {
1366 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1367 max_vector_size = 64;
1368 }
1369
1370 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1371
1372 if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1373 if (MaxVectorSize < min_vector_size) {
1374 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1375 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1376 }
1377 if (MaxVectorSize > max_vector_size) {
1378 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1379 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1380 }
1381 if (!is_power_of_2(MaxVectorSize)) {
1382 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1383 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1384 }
1385 } else {
1386 // If default, use highest supported configuration
1387 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1388 }
1389
1390 #ifdef ASSERT
1391 if (MaxVectorSize > 0) {
1392 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1393 tty->print_cr("State of YMM registers after signal handle:");
1394 int nreg = 4;
1395 const char* ymm_name[4] = {"0", "7", "8", "15"};
1396 for (int i = 0; i < nreg; i++) {
1397 tty->print("YMM%s:", ymm_name[i]);
1398 for (int j = 7; j >=0; j--) {
1399 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1400 }
1401 tty->cr();
1402 }
1403 }
1404 }
1405 #endif // ASSERT
1406
1407 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1408 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1409 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1410 }
1411 } else if (UsePoly1305Intrinsics) {
1412 if (!FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1413 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1414 }
1415 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1416 }
1417
1418 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1419 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1420 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1421 }
1422 } else if (UseIntPolyIntrinsics) {
1423 if (!FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1424 warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1425 }
1426 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1427 }
1428
1429 if (FLAG_IS_DEFAULT(UseIntPoly25519Intrinsics)) {
1430 UseIntPoly25519Intrinsics = true;
1431 }
1432
1433 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1434 UseMultiplyToLenIntrinsic = true;
1435 }
1436 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1437 UseSquareToLenIntrinsic = true;
1438 }
1439 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1440 UseMulAddIntrinsic = true;
1441 }
1442 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1443 UseMontgomeryMultiplyIntrinsic = true;
1444 }
1445 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1446 UseMontgomerySquareIntrinsic = true;
1447 }
1448 #endif // COMPILER2
1449
1450 // On new cpus instructions which update whole XMM register should be used
1451 // to prevent partial register stall due to dependencies on high half.
1452 //
1453 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1454 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1455 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1456 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1457
1458
1459 if (is_zx()) { // ZX cpus specific settings
1460 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1461 UseStoreImmI16 = false; // don't use it on ZX cpus
1462 }
1463 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1464 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1465 // Use it on all ZX cpus
1466 UseAddressNop = true;
1467 }
1468 }
1469 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1470 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1471 }
1472 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1473 if (supports_sse3()) {
1474 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1475 } else {
1476 UseXmmRegToRegMoveAll = false;
1477 }
1478 }
1479 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1480 #ifdef COMPILER2
1481 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1482 // For new ZX cpus do the next optimization:
1483 // don't align the beginning of a loop if there are enough instructions
1484 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1485 // in current fetch line (OptoLoopAlignment) or the padding
1486 // is big (> MaxLoopPad).
1487 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1488 // generated NOP instructions. 11 is the largest size of one
1489 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1490 MaxLoopPad = 11;
1491 }
1492 #endif // COMPILER2
1493 if (supports_sse4_2()) { // new ZX cpus
1494 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1495 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1496 }
1497 }
1498 }
1499
1500 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1501 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1502 }
1503 }
1504
1505 if (is_amd_family()) { // AMD cpus specific settings
1506 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1507 // Use it on new AMD cpus starting from Opteron.
1508 UseAddressNop = true;
1509 }
1510 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1511 if (supports_sse4a()) {
1512 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1513 } else {
1514 UseXmmLoadAndClearUpper = false;
1515 }
1516 }
1517 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1518 if (supports_sse4a()) {
1519 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1520 } else {
1521 UseXmmRegToRegMoveAll = false;
1522 }
1523 }
1524 if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1525 if (supports_sse4a()) {
1526 UseXmmI2F = true;
1527 } else {
1528 UseXmmI2F = false;
1529 }
1530 }
1531 if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1532 if (supports_sse4a()) {
1533 UseXmmI2D = true;
1534 } else {
1535 UseXmmI2D = false;
1536 }
1537 }
1538
1539 // some defaults for AMD family 15h
1540 if (cpu_family() == 0x15) {
1541 // On family 15h processors default is no sw prefetch
1542 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1543 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1544 }
1545 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1546 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1547 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1548 }
1549 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1550 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1551 }
1552 }
1553
1554 #ifdef COMPILER2
1555 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1556 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1557 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1558 }
1559 #endif // COMPILER2
1560
1561 // Some defaults for AMD family >= 17h && Hygon family 18h
1562 if (cpu_family() >= 0x17) {
1563 // On family >=17h processors use XMM and UnalignedLoadStores
1564 // for Array Copy
1565 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1566 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1567 }
1568 #ifdef COMPILER2
1569 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1570 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1571 }
1572 #endif
1573 }
1574 }
1575
1576 if (is_intel()) { // Intel cpus specific settings
1577 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1578 UseStoreImmI16 = false; // don't use it on Intel cpus
1579 }
1580 if (is_intel_server_family() || cpu_family() == 15) {
1581 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1582 // Use it on all Intel cpus starting from PentiumPro
1583 UseAddressNop = true;
1584 }
1585 }
1586 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1587 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1588 }
1589 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1590 if (supports_sse3()) {
1591 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1592 } else {
1593 UseXmmRegToRegMoveAll = false;
1594 }
1595 }
1596 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1597 #ifdef COMPILER2
1598 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1599 // For new Intel cpus do the next optimization:
1600 // don't align the beginning of a loop if there are enough instructions
1601 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1602 // in current fetch line (OptoLoopAlignment) or the padding
1603 // is big (> MaxLoopPad).
1604 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1605 // generated NOP instructions. 11 is the largest size of one
1606 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1607 MaxLoopPad = 11;
1608 }
1609 #endif // COMPILER2
1610
1611 if (is_intel_modern_cpu()) { // Newest Intel cpus
1612 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1613 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1614 }
1615 }
1616 }
1617 if (is_atom_family() || is_knights_family()) {
1618 #ifdef COMPILER2
1619 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1620 OptoScheduling = true;
1621 }
1622 #endif
1623 if (supports_sse4_2()) { // Silvermont
1624 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1625 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1626 }
1627 }
1628 if (FLAG_IS_DEFAULT(UseIncDec)) {
1629 FLAG_SET_DEFAULT(UseIncDec, false);
1630 }
1631 }
1632 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1633 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1634 }
1635 }
1636
1637 #ifdef COMPILER2
1638 if (UseAVX > 2) {
1639 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1640 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1641 ArrayOperationPartialInlineSize != 0 &&
1642 ArrayOperationPartialInlineSize != 16 &&
1643 ArrayOperationPartialInlineSize != 32 &&
1644 ArrayOperationPartialInlineSize != 64)) {
1645 int inline_size = 0;
1646 if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1647 inline_size = 64;
1648 } else if (MaxVectorSize >= 32) {
1649 inline_size = 32;
1650 } else if (MaxVectorSize >= 16) {
1651 inline_size = 16;
1652 }
1653 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1654 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1655 }
1656 ArrayOperationPartialInlineSize = inline_size;
1657 }
1658
1659 if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1660 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1661 if (ArrayOperationPartialInlineSize) {
1662 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1663 } else {
1664 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1665 }
1666 }
1667 }
1668
1669 if (FLAG_IS_DEFAULT(OptimizeFill)) {
1670 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1671 OptimizeFill = false;
1672 }
1673 }
1674 #endif
1675 if (supports_sse4_2()) {
1676 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1677 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1678 }
1679 } else if (UseSSE42Intrinsics) {
1680 if (!FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1681 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1682 }
1683 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1684 }
1685 if (UseSSE42Intrinsics) {
1686 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1687 UseVectorizedMismatchIntrinsic = true;
1688 }
1689 } else if (UseVectorizedMismatchIntrinsic) {
1690 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1691 warning("vectorizedMismatch intrinsics are not available on this CPU");
1692 }
1693 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1694 }
1695 if (UseAVX >= 2) {
1696 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1697 } else if (UseVectorizedHashCodeIntrinsic) {
1698 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1699 warning("vectorizedHashCode intrinsics are not available on this CPU");
1700 }
1701 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1702 }
1703
1704 // Use count trailing zeros instruction if available
1705 if (supports_bmi1()) {
1706 // tzcnt does not require VEX prefix
1707 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1708 UseCountTrailingZerosInstruction = true;
1709 }
1710 } else if (UseCountTrailingZerosInstruction) {
1711 if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1712 warning("tzcnt instruction is not available on this CPU");
1713 }
1714 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1715 }
1716
1717 // Use fast-string operations if available.
1718 if (supports_erms()) {
1719 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1720 UseFastStosb = true;
1721 }
1722 } else if (UseFastStosb) {
1723 if (!FLAG_IS_DEFAULT(UseFastStosb)) {
1724 warning("fast-string operations are not available on this CPU");
1725 }
1726 FLAG_SET_DEFAULT(UseFastStosb, false);
1727 }
1728
1729 // For AMD Processors use XMM/YMM MOVDQU instructions
1730 // for Object Initialization as default
1731 if (is_amd() && cpu_family() >= 0x19) {
1732 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1733 UseFastStosb = false;
1734 }
1735 }
1736
1737 #ifdef COMPILER2
1738 if (is_intel() && MaxVectorSize > 16) {
1739 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1740 UseFastStosb = false;
1741 }
1742 }
1743 #endif
1744
1745 // Use XMM/YMM MOVDQU instruction for Object Initialization
1746 if (UseUnalignedLoadStores) {
1747 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1748 UseXMMForObjInit = true;
1749 }
1750 } else if (UseXMMForObjInit) {
1751 if (!FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1752 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1753 }
1754 FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1755 }
1756
1757 #ifdef COMPILER2
1758 if (FLAG_IS_DEFAULT(AlignVector)) {
1759 // Modern processors allow misaligned memory operations for vectors.
1760 AlignVector = !UseUnalignedLoadStores;
1761 }
1762 #endif // COMPILER2
1763
1764 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1765 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1766 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1767 }
1768 }
1769
1770 // Allocation prefetch settings
1771 int cache_line_size = checked_cast<int>(prefetch_data_size());
1772 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1773 (cache_line_size > AllocatePrefetchStepSize)) {
1774 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1775 }
1776
1777 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1778 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1779 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1780 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1781 }
1782 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1783 }
1784
1785 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1786 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1787 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1788 }
1789
1790 if (is_intel() && is_intel_server_family() && supports_sse3()) {
1791 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1792 is_intel_modern_cpu()) { // Nehalem based cpus
1793 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1794 }
1795 #ifdef COMPILER2
1796 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1797 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1798 }
1799 #endif
1800 }
1801
1802 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1803 #ifdef COMPILER2
1804 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1805 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1806 }
1807 #endif
1808 }
1809
1810 // Prefetch settings
1811
1812 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1813 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1814 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1815 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1816
1817 // gc copy/scan is disabled if prefetchw isn't supported, because
1818 // Prefetch::write emits an inlined prefetchw on Linux.
1819 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1820 // The used prefetcht0 instruction works for both amd64 and em64t.
1821
1822 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1823 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1824 }
1825 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1826 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1827 }
1828
1829 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1830 (cache_line_size > ContendedPaddingWidth))
1831 ContendedPaddingWidth = cache_line_size;
1832
1833 // This machine allows unaligned memory accesses
1834 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1835 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1836 }
1837
1838 #ifndef PRODUCT
1839 if (log_is_enabled(Info, os, cpu)) {
1840 LogStream ls(Log(os, cpu)::info());
1841 outputStream* log = &ls;
1842 log->print_cr("Logical CPUs per core: %u",
1843 logical_processors_per_package());
1844 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1845 log->print("UseSSE=%d", UseSSE);
1846 if (UseAVX > 0) {
1847 log->print(" UseAVX=%d", UseAVX);
1848 }
1849 if (UseAES) {
1850 log->print(" UseAES=1");
1851 }
1852 #ifdef COMPILER2
1853 if (MaxVectorSize > 0) {
1854 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1855 }
1856 #endif
1857 log->cr();
1858 log->print("Allocation");
1859 if (AllocatePrefetchStyle <= 0) {
1860 log->print_cr(": no prefetching");
1861 } else {
1862 log->print(" prefetching: ");
1863 if (AllocatePrefetchInstr == 0) {
1864 log->print("PREFETCHNTA");
1865 } else if (AllocatePrefetchInstr == 1) {
1866 log->print("PREFETCHT0");
1867 } else if (AllocatePrefetchInstr == 2) {
1868 log->print("PREFETCHT2");
1869 } else if (AllocatePrefetchInstr == 3) {
1870 log->print("PREFETCHW");
1871 }
1872 if (AllocatePrefetchLines > 1) {
1873 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1874 } else {
1875 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1876 }
1877 }
1878
1879 if (PrefetchCopyIntervalInBytes > 0) {
1880 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1881 }
1882 if (PrefetchScanIntervalInBytes > 0) {
1883 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1884 }
1885 if (ContendedPaddingWidth > 0) {
1886 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1887 }
1888 }
1889 #endif // !PRODUCT
1890 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1891 FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1892 }
1893 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1894 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1895 }
1896 // CopyAVX3Threshold is the threshold at which 64-byte vector instructions
1897 // are used for implementing the array copy, fill and clear operations.
1898 // The Intel platforms that support the serialize instruction and the AMD
1899 // platforms with native 512-bit datapath have improved implementation of
1900 // 64-byte load/stores and so the default threshold is set to 0 for these
1901 // platforms.
1902 if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
1903 if (is_intel() && is_intel_server_family() && supports_serialize()) {
1904 FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1905 } else if (is_amd() && is_amd_avx512_datapath_server_family()) {
1906 FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1907 } else {
1908 FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
1909 }
1910 }
1911 }
1912
1913 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1914 VirtualizationType vrt = VM_Version::get_detected_virtualization();
1915 if (vrt == XenHVM) {
1916 st->print_cr("Xen hardware-assisted virtualization detected");
1917 } else if (vrt == KVM) {
1918 st->print_cr("KVM virtualization detected");
1919 } else if (vrt == VMWare) {
1920 st->print_cr("VMWare virtualization detected");
1921 VirtualizationSupport::print_virtualization_info(st);
1922 } else if (vrt == HyperV) {
1923 st->print_cr("Hyper-V virtualization detected");
1924 } else if (vrt == HyperVRole) {
1925 st->print_cr("Hyper-V role detected");
1926 }
1927 }
1928
1929 bool VM_Version::compute_has_intel_jcc_erratum() {
1930 if (!is_intel_family_core()) {
1931 // Only Intel CPUs are affected.
1932 return false;
1933 }
1934 // The following table of affected CPUs is based on the following document released by Intel:
1935 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1936 switch (_model) {
1937 case 0x8E:
1938 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1939 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1940 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1941 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1942 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1943 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1944 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1945 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1946 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1947 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1948 case 0x4E:
1949 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1950 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1951 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1952 return _stepping == 0x3;
1953 case 0x55:
1954 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1955 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1956 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1957 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1958 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1959 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1960 return _stepping == 0x4 || _stepping == 0x7;
1961 case 0x5E:
1962 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1963 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1964 return _stepping == 0x3;
1965 case 0x9E:
1966 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1967 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1968 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1969 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1970 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1971 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1972 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1973 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1974 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1975 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1976 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1977 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1978 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1979 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1980 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1981 case 0xA5:
1982 // Not in Intel documentation.
1983 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
1984 return true;
1985 case 0xA6:
1986 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
1987 return _stepping == 0x0;
1988 case 0xAE:
1989 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
1990 return _stepping == 0xA;
1991 default:
1992 // If we are running on another intel machine not recognized in the table, we are okay.
1993 return false;
1994 }
1995 }
1996
1997 // On Xen, the cpuid instruction returns
1998 // eax / registers[0]: Version of Xen
1999 // ebx / registers[1]: chars 'XenV'
2000 // ecx / registers[2]: chars 'MMXe'
2001 // edx / registers[3]: chars 'nVMM'
2002 //
2003 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2004 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2005 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2006 // edx / registers[3]: chars 'M' / 'ware' / 't Hv'
2007 //
2008 // more information :
2009 // https://kb.vmware.com/s/article/1009458
2010 //
2011 void VM_Version::check_virtualizations() {
2012 uint32_t registers[4] = {0};
2013 char signature[13] = {0};
2014
2015 // Xen cpuid leaves can be found 0x100 aligned boundary starting
2016 // from 0x40000000 until 0x40010000.
2017 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2018 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2019 detect_virt_stub(leaf, registers);
2020 memcpy(signature, ®isters[1], 12);
2021
2022 if (strncmp("VMwareVMware", signature, 12) == 0) {
2023 Abstract_VM_Version::_detected_virtualization = VMWare;
2024 // check for extended metrics from guestlib
2025 VirtualizationSupport::initialize();
2026 } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2027 Abstract_VM_Version::_detected_virtualization = HyperV;
2028 #ifdef _WINDOWS
2029 // CPUID leaf 0x40000007 is available to the root partition only.
2030 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2031 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2032 detect_virt_stub(0x40000007, registers);
2033 if ((registers[0] != 0x0) ||
2034 (registers[1] != 0x0) ||
2035 (registers[2] != 0x0) ||
2036 (registers[3] != 0x0)) {
2037 Abstract_VM_Version::_detected_virtualization = HyperVRole;
2038 }
2039 #endif
2040 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2041 Abstract_VM_Version::_detected_virtualization = KVM;
2042 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2043 Abstract_VM_Version::_detected_virtualization = XenHVM;
2044 }
2045 }
2046 }
2047
2048 #ifdef COMPILER2
2049 // Determine if it's running on Cascade Lake using default options.
2050 bool VM_Version::is_default_intel_cascade_lake() {
2051 return FLAG_IS_DEFAULT(UseAVX) &&
2052 FLAG_IS_DEFAULT(MaxVectorSize) &&
2053 UseAVX > 2 &&
2054 is_intel_cascade_lake();
2055 }
2056 #endif
2057
2058 bool VM_Version::is_intel_cascade_lake() {
2059 return is_intel_skylake() && _stepping >= 5;
2060 }
2061
2062 bool VM_Version::is_intel_darkmont() {
2063 return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2064 }
2065
2066 void VM_Version::clear_apx_test_state() {
2067 clear_apx_test_state_stub();
2068 }
2069
2070 static bool _vm_version_initialized = false;
2071
2072 void VM_Version::initialize() {
2073 ResourceMark rm;
2074
2075 // Making this stub must be FIRST use of assembler
2076 stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2077 if (stub_blob == nullptr) {
2078 vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2079 }
2080 CodeBuffer c(stub_blob);
2081 VM_Version_StubGenerator g(&c);
2082
2083 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2084 g.generate_get_cpu_info());
2085 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2086 g.generate_detect_virt());
2087 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2088 g.clear_apx_test_state());
2089 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2090 g.generate_getCPUIDBrandString());
2091 get_processor_features();
2092
2093 Assembler::precompute_instructions();
2094
2095 if (VM_Version::supports_hv()) { // Supports hypervisor
2096 check_virtualizations();
2097 }
2098 _vm_version_initialized = true;
2099 }
2100
2101 typedef enum {
2102 CPU_FAMILY_8086_8088 = 0,
2103 CPU_FAMILY_INTEL_286 = 2,
2104 CPU_FAMILY_INTEL_386 = 3,
2105 CPU_FAMILY_INTEL_486 = 4,
2106 CPU_FAMILY_PENTIUM = 5,
2107 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models
2108 CPU_FAMILY_PENTIUM_4 = 0xF
2109 } FamilyFlag;
2110
2111 typedef enum {
2112 RDTSCP_FLAG = 0x08000000, // bit 27
2113 INTEL64_FLAG = 0x20000000 // bit 29
2114 } _featureExtendedEdxFlag;
2115
2116 typedef enum {
2117 FPU_FLAG = 0x00000001,
2118 VME_FLAG = 0x00000002,
2119 DE_FLAG = 0x00000004,
2120 PSE_FLAG = 0x00000008,
2121 TSC_FLAG = 0x00000010,
2122 MSR_FLAG = 0x00000020,
2123 PAE_FLAG = 0x00000040,
2124 MCE_FLAG = 0x00000080,
2125 CX8_FLAG = 0x00000100,
2126 APIC_FLAG = 0x00000200,
2127 SEP_FLAG = 0x00000800,
2128 MTRR_FLAG = 0x00001000,
2129 PGE_FLAG = 0x00002000,
2130 MCA_FLAG = 0x00004000,
2131 CMOV_FLAG = 0x00008000,
2132 PAT_FLAG = 0x00010000,
2133 PSE36_FLAG = 0x00020000,
2134 PSNUM_FLAG = 0x00040000,
2135 CLFLUSH_FLAG = 0x00080000,
2136 DTS_FLAG = 0x00200000,
2137 ACPI_FLAG = 0x00400000,
2138 MMX_FLAG = 0x00800000,
2139 FXSR_FLAG = 0x01000000,
2140 SSE_FLAG = 0x02000000,
2141 SSE2_FLAG = 0x04000000,
2142 SS_FLAG = 0x08000000,
2143 HTT_FLAG = 0x10000000,
2144 TM_FLAG = 0x20000000
2145 } FeatureEdxFlag;
2146
2147 // VM_Version statics
2148 enum {
2149 ExtendedFamilyIdLength_INTEL = 16,
2150 ExtendedFamilyIdLength_AMD = 24
2151 };
2152
2153 const size_t VENDOR_LENGTH = 13;
2154 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2155 static char* _cpu_brand_string = nullptr;
2156 static int64_t _max_qualified_cpu_frequency = 0;
2157
2158 static int _no_of_threads = 0;
2159 static int _no_of_cores = 0;
2160
2161 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2162 "8086/8088",
2163 "",
2164 "286",
2165 "386",
2166 "486",
2167 "Pentium",
2168 "Pentium Pro", //or Pentium-M/Woodcrest depending on model
2169 "",
2170 "",
2171 "",
2172 "",
2173 "",
2174 "",
2175 "",
2176 "",
2177 "Pentium 4"
2178 };
2179
2180 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2181 "",
2182 "",
2183 "",
2184 "",
2185 "5x86",
2186 "K5/K6",
2187 "Athlon/AthlonXP",
2188 "",
2189 "",
2190 "",
2191 "",
2192 "",
2193 "",
2194 "",
2195 "",
2196 "Opteron/Athlon64",
2197 "Opteron QC/Phenom", // Barcelona et.al.
2198 "",
2199 "",
2200 "",
2201 "",
2202 "",
2203 "",
2204 "Zen"
2205 };
2206 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2207 // September 2013, Vol 3C Table 35-1
2208 const char* const _model_id_pentium_pro[] = {
2209 "",
2210 "Pentium Pro",
2211 "",
2212 "Pentium II model 3",
2213 "",
2214 "Pentium II model 5/Xeon/Celeron",
2215 "Celeron",
2216 "Pentium III/Pentium III Xeon",
2217 "Pentium III/Pentium III Xeon",
2218 "Pentium M model 9", // Yonah
2219 "Pentium III, model A",
2220 "Pentium III, model B",
2221 "",
2222 "Pentium M model D", // Dothan
2223 "",
2224 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2225 "",
2226 "",
2227 "",
2228 "",
2229 "",
2230 "",
2231 "Celeron", // 0x16 Celeron 65nm
2232 "Core 2", // 0x17 Penryn / Harpertown
2233 "",
2234 "",
2235 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP
2236 "Atom", // 0x1B Z5xx series Silverthorn
2237 "",
2238 "Core 2", // 0x1D Dunnington (6-core)
2239 "Nehalem", // 0x1E CPU_MODEL_NEHALEM
2240 "",
2241 "",
2242 "",
2243 "",
2244 "",
2245 "",
2246 "Westmere", // 0x25 CPU_MODEL_WESTMERE
2247 "",
2248 "",
2249 "", // 0x28
2250 "",
2251 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3"
2252 "",
2253 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP
2254 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2255 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX
2256 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX
2257 "",
2258 "",
2259 "",
2260 "",
2261 "",
2262 "",
2263 "",
2264 "",
2265 "",
2266 "",
2267 "Ivy Bridge", // 0x3a
2268 "",
2269 "Haswell", // 0x3c "4th Generation Intel Core Processor"
2270 "", // 0x3d "Next Generation Intel Core Processor"
2271 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2272 "", // 0x3f "Future Generation Intel Xeon Processor"
2273 "",
2274 "",
2275 "",
2276 "",
2277 "",
2278 "Haswell", // 0x45 "4th Generation Intel Core Processor"
2279 "Haswell", // 0x46 "4th Generation Intel Core Processor"
2280 nullptr
2281 };
2282
2283 /* Brand ID is for back compatibility
2284 * Newer CPUs uses the extended brand string */
2285 const char* const _brand_id[] = {
2286 "",
2287 "Celeron processor",
2288 "Pentium III processor",
2289 "Intel Pentium III Xeon processor",
2290 "",
2291 "",
2292 "",
2293 "",
2294 "Intel Pentium 4 processor",
2295 nullptr
2296 };
2297
2298
2299 const char* const _feature_edx_id[] = {
2300 "On-Chip FPU",
2301 "Virtual Mode Extensions",
2302 "Debugging Extensions",
2303 "Page Size Extensions",
2304 "Time Stamp Counter",
2305 "Model Specific Registers",
2306 "Physical Address Extension",
2307 "Machine Check Exceptions",
2308 "CMPXCHG8B Instruction",
2309 "On-Chip APIC",
2310 "",
2311 "Fast System Call",
2312 "Memory Type Range Registers",
2313 "Page Global Enable",
2314 "Machine Check Architecture",
2315 "Conditional Mov Instruction",
2316 "Page Attribute Table",
2317 "36-bit Page Size Extension",
2318 "Processor Serial Number",
2319 "CLFLUSH Instruction",
2320 "",
2321 "Debug Trace Store feature",
2322 "ACPI registers in MSR space",
2323 "Intel Architecture MMX Technology",
2324 "Fast Float Point Save and Restore",
2325 "Streaming SIMD extensions",
2326 "Streaming SIMD extensions 2",
2327 "Self-Snoop",
2328 "Hyper Threading",
2329 "Thermal Monitor",
2330 "",
2331 "Pending Break Enable"
2332 };
2333
2334 const char* const _feature_extended_edx_id[] = {
2335 "",
2336 "",
2337 "",
2338 "",
2339 "",
2340 "",
2341 "",
2342 "",
2343 "",
2344 "",
2345 "",
2346 "SYSCALL/SYSRET",
2347 "",
2348 "",
2349 "",
2350 "",
2351 "",
2352 "",
2353 "",
2354 "",
2355 "Execute Disable Bit",
2356 "",
2357 "",
2358 "",
2359 "",
2360 "",
2361 "",
2362 "RDTSCP",
2363 "",
2364 "Intel 64 Architecture",
2365 "",
2366 ""
2367 };
2368
2369 const char* const _feature_ecx_id[] = {
2370 "Streaming SIMD Extensions 3",
2371 "PCLMULQDQ",
2372 "64-bit DS Area",
2373 "MONITOR/MWAIT instructions",
2374 "CPL Qualified Debug Store",
2375 "Virtual Machine Extensions",
2376 "Safer Mode Extensions",
2377 "Enhanced Intel SpeedStep technology",
2378 "Thermal Monitor 2",
2379 "Supplemental Streaming SIMD Extensions 3",
2380 "L1 Context ID",
2381 "",
2382 "Fused Multiply-Add",
2383 "CMPXCHG16B",
2384 "xTPR Update Control",
2385 "Perfmon and Debug Capability",
2386 "",
2387 "Process-context identifiers",
2388 "Direct Cache Access",
2389 "Streaming SIMD extensions 4.1",
2390 "Streaming SIMD extensions 4.2",
2391 "x2APIC",
2392 "MOVBE",
2393 "Popcount instruction",
2394 "TSC-Deadline",
2395 "AESNI",
2396 "XSAVE",
2397 "OSXSAVE",
2398 "AVX",
2399 "F16C",
2400 "RDRAND",
2401 ""
2402 };
2403
2404 const char* const _feature_extended_ecx_id[] = {
2405 "LAHF/SAHF instruction support",
2406 "Core multi-processor legacy mode",
2407 "",
2408 "",
2409 "",
2410 "Advanced Bit Manipulations: LZCNT",
2411 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2412 "Misaligned SSE mode",
2413 "",
2414 "",
2415 "",
2416 "",
2417 "",
2418 "",
2419 "",
2420 "",
2421 "",
2422 "",
2423 "",
2424 "",
2425 "",
2426 "",
2427 "",
2428 "",
2429 "",
2430 "",
2431 "",
2432 "",
2433 "",
2434 "",
2435 "",
2436 ""
2437 };
2438
2439 const char* VM_Version::cpu_model_description(void) {
2440 uint32_t cpu_family = extended_cpu_family();
2441 uint32_t cpu_model = extended_cpu_model();
2442 const char* model = nullptr;
2443
2444 if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2445 for (uint32_t i = 0; i <= cpu_model; i++) {
2446 model = _model_id_pentium_pro[i];
2447 if (model == nullptr) {
2448 break;
2449 }
2450 }
2451 }
2452 return model;
2453 }
2454
2455 const char* VM_Version::cpu_brand_string(void) {
2456 if (_cpu_brand_string == nullptr) {
2457 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2458 if (nullptr == _cpu_brand_string) {
2459 return nullptr;
2460 }
2461 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2462 if (ret_val != OS_OK) {
2463 FREE_C_HEAP_ARRAY(_cpu_brand_string);
2464 _cpu_brand_string = nullptr;
2465 }
2466 }
2467 return _cpu_brand_string;
2468 }
2469
2470 const char* VM_Version::cpu_brand(void) {
2471 const char* brand = nullptr;
2472
2473 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2474 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2475 brand = _brand_id[0];
2476 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2477 brand = _brand_id[i];
2478 }
2479 }
2480 return brand;
2481 }
2482
2483 bool VM_Version::cpu_is_em64t(void) {
2484 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2485 }
2486
2487 bool VM_Version::is_netburst(void) {
2488 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2489 }
2490
2491 bool VM_Version::supports_tscinv_ext(void) {
2492 if (!supports_tscinv_bit()) {
2493 return false;
2494 }
2495
2496 if (is_intel()) {
2497 return true;
2498 }
2499
2500 if (is_amd()) {
2501 return !is_amd_Barcelona();
2502 }
2503
2504 if (is_hygon()) {
2505 return true;
2506 }
2507
2508 return false;
2509 }
2510
2511 void VM_Version::resolve_cpu_information_details(void) {
2512
2513 // in future we want to base this information on proper cpu
2514 // and cache topology enumeration such as:
2515 // Intel 64 Architecture Processor Topology Enumeration
2516 // which supports system cpu and cache topology enumeration
2517 // either using 2xAPICIDs or initial APICIDs
2518
2519 // currently only rough cpu information estimates
2520 // which will not necessarily reflect the exact configuration of the system
2521
2522 // this is the number of logical hardware threads
2523 // visible to the operating system
2524 _no_of_threads = os::processor_count();
2525
2526 // find out number of threads per cpu package
2527 int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2528 if (threads_per_package == 0) {
2529 // Fallback code to avoid div by zero in subsequent code.
2530 // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2531 threads_per_package = threads_per_core() * cores_per_cpu();
2532 }
2533
2534 // use amount of threads visible to the process in order to guess number of sockets
2535 _no_of_sockets = _no_of_threads / threads_per_package;
2536
2537 // process might only see a subset of the total number of threads
2538 // from a single processor package. Virtualization/resource management for example.
2539 // If so then just write a hard 1 as num of pkgs.
2540 if (0 == _no_of_sockets) {
2541 _no_of_sockets = 1;
2542 }
2543
2544 // estimate the number of cores
2545 _no_of_cores = cores_per_cpu() * _no_of_sockets;
2546 }
2547
2548
2549 const char* VM_Version::cpu_family_description(void) {
2550 int cpu_family_id = extended_cpu_family();
2551 if (is_amd()) {
2552 if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2553 return _family_id_amd[cpu_family_id];
2554 }
2555 }
2556 if (is_intel()) {
2557 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2558 return cpu_model_description();
2559 }
2560 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2561 return _family_id_intel[cpu_family_id];
2562 }
2563 }
2564 if (is_zx()) {
2565 int cpu_model_id = extended_cpu_model();
2566 if (cpu_family_id == 7) {
2567 switch (cpu_model_id) {
2568 case 0x1B:
2569 return "wudaokou";
2570 case 0x3B:
2571 return "lujiazui";
2572 case 0x5B:
2573 return "yongfeng";
2574 case 0x6B:
2575 return "shijidadao";
2576 }
2577 } else if (cpu_family_id == 6) {
2578 return "zhangjiang";
2579 }
2580 }
2581 if (is_hygon()) {
2582 return "Dhyana";
2583 }
2584 return "Unknown x86";
2585 }
2586
2587 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2588 assert(buf != nullptr, "buffer is null!");
2589 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2590
2591 const char* cpu_type = nullptr;
2592 const char* x64 = nullptr;
2593
2594 if (is_intel()) {
2595 cpu_type = "Intel";
2596 x64 = cpu_is_em64t() ? " Intel64" : "";
2597 } else if (is_amd()) {
2598 cpu_type = "AMD";
2599 x64 = cpu_is_em64t() ? " AMD64" : "";
2600 } else if (is_zx()) {
2601 cpu_type = "Zhaoxin";
2602 x64 = cpu_is_em64t() ? " x86_64" : "";
2603 } else if (is_hygon()) {
2604 cpu_type = "Hygon";
2605 x64 = cpu_is_em64t() ? " AMD64" : "";
2606 } else {
2607 cpu_type = "Unknown x86";
2608 x64 = cpu_is_em64t() ? " x86_64" : "";
2609 }
2610
2611 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2612 cpu_type,
2613 cpu_family_description(),
2614 supports_ht() ? " (HT)" : "",
2615 supports_sse3() ? " SSE3" : "",
2616 supports_ssse3() ? " SSSE3" : "",
2617 supports_sse4_1() ? " SSE4.1" : "",
2618 supports_sse4_2() ? " SSE4.2" : "",
2619 supports_sse4a() ? " SSE4A" : "",
2620 is_netburst() ? " Netburst" : "",
2621 is_intel_family_core() ? " Core" : "",
2622 x64);
2623
2624 return OS_OK;
2625 }
2626
2627 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2628 assert(buf != nullptr, "buffer is null!");
2629 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2630 assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2631
2632 // invoke newly generated asm code to fetch CPU Brand String
2633 getCPUIDBrandString_stub(&_cpuid_info);
2634
2635 // fetch results into buffer
2636 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0;
2637 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1;
2638 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2;
2639 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2640 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2641 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2642 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2643 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2644 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2645 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2646 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2647 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2648
2649 return OS_OK;
2650 }
2651
2652 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2653 guarantee(buf != nullptr, "buffer is null!");
2654 guarantee(buf_len > 0, "buffer len not enough!");
2655
2656 unsigned int flag = 0;
2657 unsigned int fi = 0;
2658 size_t written = 0;
2659 const char* prefix = "";
2660
2661 #define WRITE_TO_BUF(string) \
2662 { \
2663 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2664 if (res < 0) { \
2665 return buf_len - 1; \
2666 } \
2667 written += res; \
2668 if (prefix[0] == '\0') { \
2669 prefix = ", "; \
2670 } \
2671 }
2672
2673 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2674 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2675 continue; /* no hyperthreading */
2676 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2677 continue; /* no fast system call */
2678 }
2679 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2680 WRITE_TO_BUF(_feature_edx_id[fi]);
2681 }
2682 }
2683
2684 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2685 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2686 WRITE_TO_BUF(_feature_ecx_id[fi]);
2687 }
2688 }
2689
2690 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2691 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2692 WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2693 }
2694 }
2695
2696 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2697 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2698 WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2699 }
2700 }
2701
2702 if (supports_tscinv_bit()) {
2703 WRITE_TO_BUF("Invariant TSC");
2704 }
2705
2706 if (supports_hybrid()) {
2707 WRITE_TO_BUF("Hybrid Architecture");
2708 }
2709
2710 return written;
2711 }
2712
2713 /**
2714 * Write a detailed description of the cpu to a given buffer, including
2715 * feature set.
2716 */
2717 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2718 assert(buf != nullptr, "buffer is null!");
2719 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2720
2721 static const char* unknown = "<unknown>";
2722 char vendor_id[VENDOR_LENGTH];
2723 const char* family = nullptr;
2724 const char* model = nullptr;
2725 const char* brand = nullptr;
2726 int outputLen = 0;
2727
2728 family = cpu_family_description();
2729 if (family == nullptr) {
2730 family = unknown;
2731 }
2732
2733 model = cpu_model_description();
2734 if (model == nullptr) {
2735 model = unknown;
2736 }
2737
2738 brand = cpu_brand_string();
2739
2740 if (brand == nullptr) {
2741 brand = cpu_brand();
2742 if (brand == nullptr) {
2743 brand = unknown;
2744 }
2745 }
2746
2747 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2748 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2749 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2750 vendor_id[VENDOR_LENGTH-1] = '\0';
2751
2752 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2753 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2754 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2755 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2756 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2757 "Supports: ",
2758 brand,
2759 vendor_id,
2760 family,
2761 extended_cpu_family(),
2762 model,
2763 extended_cpu_model(),
2764 cpu_stepping(),
2765 _cpuid_info.std_cpuid1_eax.bits.ext_family,
2766 _cpuid_info.std_cpuid1_eax.bits.ext_model,
2767 _cpuid_info.std_cpuid1_eax.bits.proc_type,
2768 _cpuid_info.std_cpuid1_eax.value,
2769 _cpuid_info.std_cpuid1_ebx.value,
2770 _cpuid_info.std_cpuid1_ecx.value,
2771 _cpuid_info.std_cpuid1_edx.value,
2772 _cpuid_info.ext_cpuid1_eax,
2773 _cpuid_info.ext_cpuid1_ebx,
2774 _cpuid_info.ext_cpuid1_ecx,
2775 _cpuid_info.ext_cpuid1_edx);
2776
2777 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2778 if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2779 return OS_ERR;
2780 }
2781
2782 cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2783
2784 return OS_OK;
2785 }
2786
2787
2788 // Fill in Abstract_VM_Version statics
2789 void VM_Version::initialize_cpu_information() {
2790 assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2791 assert(!_initialized, "shouldn't be initialized yet");
2792 resolve_cpu_information_details();
2793
2794 // initialize cpu_name and cpu_desc
2795 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2796 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2797 _initialized = true;
2798 }
2799
2800 /**
2801 * For information about extracting the frequency from the cpu brand string, please see:
2802 *
2803 * Intel Processor Identification and the CPUID Instruction
2804 * Application Note 485
2805 * May 2012
2806 *
2807 * The return value is the frequency in Hz.
2808 */
2809 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2810 const char* const brand_string = cpu_brand_string();
2811 if (brand_string == nullptr) {
2812 return 0;
2813 }
2814 const int64_t MEGA = 1000000;
2815 int64_t multiplier = 0;
2816 int64_t frequency = 0;
2817 uint8_t idx = 0;
2818 // The brand string buffer is at most 48 bytes.
2819 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2820 for (; idx < 48-2; ++idx) {
2821 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2822 // Search brand string for "yHz" where y is M, G, or T.
2823 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2824 if (brand_string[idx] == 'M') {
2825 multiplier = MEGA;
2826 } else if (brand_string[idx] == 'G') {
2827 multiplier = MEGA * 1000;
2828 } else if (brand_string[idx] == 'T') {
2829 multiplier = MEGA * MEGA;
2830 }
2831 break;
2832 }
2833 }
2834 if (multiplier > 0) {
2835 // Compute frequency (in Hz) from brand string.
2836 if (brand_string[idx-3] == '.') { // if format is "x.xx"
2837 frequency = (brand_string[idx-4] - '0') * multiplier;
2838 frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2839 frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2840 } else { // format is "xxxx"
2841 frequency = (brand_string[idx-4] - '0') * 1000;
2842 frequency += (brand_string[idx-3] - '0') * 100;
2843 frequency += (brand_string[idx-2] - '0') * 10;
2844 frequency += (brand_string[idx-1] - '0');
2845 frequency *= multiplier;
2846 }
2847 }
2848 return frequency;
2849 }
2850
2851
2852 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2853 if (_max_qualified_cpu_frequency == 0) {
2854 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2855 }
2856 return _max_qualified_cpu_frequency;
2857 }
2858
2859 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2860 VM_Features vm_features;
2861
2862 // check the features that must be present
2863 guarantee(std_cpuid1_edx.bits.sse2 != 0, "sse2 is not supported");
2864 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
2865 // clflush_size is size in quadwords (8 bytes).
2866 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == ICache::line_size/8, "clflush size is not supported");
2867
2868 // sse and sse2 are guaranteed to be present
2869 vm_features.set_feature(CPU_SSE);
2870 vm_features.set_feature(CPU_SSE2);
2871
2872 if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2873 vm_features.set_feature(CPU_CX8);
2874 if (std_cpuid1_edx.bits.cmov != 0)
2875 vm_features.set_feature(CPU_CMOV);
2876 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2877 ext_cpuid1_edx.bits.fxsr != 0))
2878 vm_features.set_feature(CPU_FXSR);
2879 // HT flag is set for multi-core processors also.
2880 if (threads_per_core() > 1)
2881 vm_features.set_feature(CPU_HT);
2882 if (std_cpuid1_ecx.bits.sse3 != 0)
2883 vm_features.set_feature(CPU_SSE3);
2884 if (std_cpuid1_ecx.bits.ssse3 != 0)
2885 vm_features.set_feature(CPU_SSSE3);
2886 if (std_cpuid1_ecx.bits.sse4_1 != 0)
2887 vm_features.set_feature(CPU_SSE4_1);
2888 if (std_cpuid1_ecx.bits.sse4_2 != 0)
2889 vm_features.set_feature(CPU_SSE4_2);
2890 if (std_cpuid1_ecx.bits.popcnt != 0)
2891 vm_features.set_feature(CPU_POPCNT);
2892 if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2893 xem_xcr0_eax.bits.apx_f != 0 &&
2894 std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2895 vm_features.set_feature(CPU_APX_F);
2896 }
2897 if (std_cpuid1_ecx.bits.avx != 0 &&
2898 std_cpuid1_ecx.bits.osxsave != 0 &&
2899 xem_xcr0_eax.bits.sse != 0 &&
2900 xem_xcr0_eax.bits.ymm != 0) {
2901 vm_features.set_feature(CPU_AVX);
2902 vm_features.set_feature(CPU_VZEROUPPER);
2903 if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2904 vm_features.set_feature(CPU_SHA512);
2905 if (std_cpuid1_ecx.bits.f16c != 0)
2906 vm_features.set_feature(CPU_F16C);
2907 if (sef_cpuid7_ebx.bits.avx2 != 0) {
2908 vm_features.set_feature(CPU_AVX2);
2909 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2910 vm_features.set_feature(CPU_AVX_IFMA);
2911 }
2912 if (sef_cpuid7_ecx.bits.gfni != 0)
2913 vm_features.set_feature(CPU_GFNI);
2914 if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2915 xem_xcr0_eax.bits.opmask != 0 &&
2916 xem_xcr0_eax.bits.zmm512 != 0 &&
2917 xem_xcr0_eax.bits.zmm32 != 0) {
2918 vm_features.set_feature(CPU_AVX512F);
2919 if (sef_cpuid7_ebx.bits.avx512cd != 0)
2920 vm_features.set_feature(CPU_AVX512CD);
2921 if (sef_cpuid7_ebx.bits.avx512dq != 0)
2922 vm_features.set_feature(CPU_AVX512DQ);
2923 if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2924 vm_features.set_feature(CPU_AVX512_IFMA);
2925 if (sef_cpuid7_ebx.bits.avx512pf != 0)
2926 vm_features.set_feature(CPU_AVX512PF);
2927 if (sef_cpuid7_ebx.bits.avx512er != 0)
2928 vm_features.set_feature(CPU_AVX512ER);
2929 if (sef_cpuid7_ebx.bits.avx512bw != 0)
2930 vm_features.set_feature(CPU_AVX512BW);
2931 if (sef_cpuid7_ebx.bits.avx512vl != 0)
2932 vm_features.set_feature(CPU_AVX512VL);
2933 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2934 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2935 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2936 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2937 if (sef_cpuid7_ecx.bits.vaes != 0)
2938 vm_features.set_feature(CPU_AVX512_VAES);
2939 if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2940 vm_features.set_feature(CPU_AVX512_VNNI);
2941 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2942 vm_features.set_feature(CPU_AVX512_BITALG);
2943 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2944 vm_features.set_feature(CPU_AVX512_VBMI);
2945 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2946 vm_features.set_feature(CPU_AVX512_VBMI2);
2947 }
2948 if (is_intel()) {
2949 if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2950 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2951 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2952 xem_xcr0_eax.bits.opmask != 0 &&
2953 xem_xcr0_eax.bits.zmm512 != 0 &&
2954 xem_xcr0_eax.bits.zmm32 != 0) {
2955 vm_features.set_feature(CPU_AVX10_1);
2956 vm_features.set_feature(CPU_AVX512F);
2957 vm_features.set_feature(CPU_AVX512CD);
2958 vm_features.set_feature(CPU_AVX512DQ);
2959 vm_features.set_feature(CPU_AVX512PF);
2960 vm_features.set_feature(CPU_AVX512ER);
2961 vm_features.set_feature(CPU_AVX512BW);
2962 vm_features.set_feature(CPU_AVX512VL);
2963 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2964 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2965 vm_features.set_feature(CPU_AVX512_VAES);
2966 vm_features.set_feature(CPU_AVX512_VNNI);
2967 vm_features.set_feature(CPU_AVX512_BITALG);
2968 vm_features.set_feature(CPU_AVX512_VBMI);
2969 vm_features.set_feature(CPU_AVX512_VBMI2);
2970 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
2971 vm_features.set_feature(CPU_AVX10_2);
2972 }
2973 }
2974 }
2975 }
2976
2977 if (std_cpuid1_ecx.bits.hv != 0)
2978 vm_features.set_feature(CPU_HV);
2979 if (sef_cpuid7_ebx.bits.bmi1 != 0)
2980 vm_features.set_feature(CPU_BMI1);
2981 if (std_cpuid1_edx.bits.tsc != 0)
2982 vm_features.set_feature(CPU_TSC);
2983 if (ext_cpuid7_edx.bits.tsc_invariance != 0)
2984 vm_features.set_feature(CPU_TSCINV_BIT);
2985 if (std_cpuid1_ecx.bits.aes != 0)
2986 vm_features.set_feature(CPU_AES);
2987 if (ext_cpuid1_ecx.bits.lzcnt != 0)
2988 vm_features.set_feature(CPU_LZCNT);
2989 if (ext_cpuid1_ecx.bits.prefetchw != 0)
2990 vm_features.set_feature(CPU_3DNOW_PREFETCH);
2991 if (sef_cpuid7_ebx.bits.erms != 0)
2992 vm_features.set_feature(CPU_ERMS);
2993 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2994 vm_features.set_feature(CPU_FSRM);
2995 if (std_cpuid1_ecx.bits.clmul != 0)
2996 vm_features.set_feature(CPU_CLMUL);
2997 if (sef_cpuid7_ebx.bits.rtm != 0)
2998 vm_features.set_feature(CPU_RTM);
2999 if (sef_cpuid7_ebx.bits.adx != 0)
3000 vm_features.set_feature(CPU_ADX);
3001 if (sef_cpuid7_ebx.bits.bmi2 != 0)
3002 vm_features.set_feature(CPU_BMI2);
3003 if (sef_cpuid7_ebx.bits.sha != 0)
3004 vm_features.set_feature(CPU_SHA);
3005 if (std_cpuid1_ecx.bits.fma != 0)
3006 vm_features.set_feature(CPU_FMA);
3007 if (sef_cpuid7_ebx.bits.clflushopt != 0)
3008 vm_features.set_feature(CPU_FLUSHOPT);
3009 if (sef_cpuid7_ebx.bits.clwb != 0)
3010 vm_features.set_feature(CPU_CLWB);
3011 if (ext_cpuid1_edx.bits.rdtscp != 0)
3012 vm_features.set_feature(CPU_RDTSCP);
3013 if (sef_cpuid7_ecx.bits.rdpid != 0)
3014 vm_features.set_feature(CPU_RDPID);
3015
3016 // AMD|Hygon additional features.
3017 if (is_amd_family()) {
3018 // PREFETCHW was checked above, check TDNOW here.
3019 if ((ext_cpuid1_edx.bits.tdnow != 0))
3020 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3021 if (ext_cpuid1_ecx.bits.sse4a != 0)
3022 vm_features.set_feature(CPU_SSE4A);
3023 }
3024
3025 // Intel additional features.
3026 if (is_intel()) {
3027 if (sef_cpuid7_edx.bits.serialize != 0)
3028 vm_features.set_feature(CPU_SERIALIZE);
3029 if (sef_cpuid7_edx.bits.hybrid != 0)
3030 vm_features.set_feature(CPU_HYBRID);
3031 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3032 vm_features.set_feature(CPU_AVX512_FP16);
3033 }
3034
3035 // ZX additional features.
3036 if (is_zx()) {
3037 // We do not know if these are supported by ZX, so we cannot trust
3038 // common CPUID bit for them.
3039 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3040 vm_features.clear_feature(CPU_CLWB);
3041 }
3042
3043 // Protection key features.
3044 if (sef_cpuid7_ecx.bits.pku != 0) {
3045 vm_features.set_feature(CPU_PKU);
3046 }
3047 if (sef_cpuid7_ecx.bits.ospke != 0) {
3048 vm_features.set_feature(CPU_OSPKE);
3049 }
3050
3051 // Control flow enforcement (CET) features.
3052 if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3053 vm_features.set_feature(CPU_CET_SS);
3054 }
3055 if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3056 vm_features.set_feature(CPU_CET_IBT);
3057 }
3058
3059 // Composite features.
3060 if (supports_tscinv_bit() &&
3061 ((is_amd_family() && !is_amd_Barcelona()) ||
3062 is_intel_tsc_synched_at_init())) {
3063 vm_features.set_feature(CPU_TSCINV);
3064 }
3065 return vm_features;
3066 }
3067
3068 bool VM_Version::os_supports_avx_vectors() {
3069 bool retVal = false;
3070 int nreg = 4;
3071 if (supports_evex()) {
3072 // Verify that OS save/restore all bits of EVEX registers
3073 // during signal processing.
3074 retVal = true;
3075 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3076 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3077 retVal = false;
3078 break;
3079 }
3080 }
3081 } else if (supports_avx()) {
3082 // Verify that OS save/restore all bits of AVX registers
3083 // during signal processing.
3084 retVal = true;
3085 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3086 if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3087 retVal = false;
3088 break;
3089 }
3090 }
3091 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3092 if (retVal == false) {
3093 // Verify that OS save/restore all bits of EVEX registers
3094 // during signal processing.
3095 retVal = true;
3096 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3097 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3098 retVal = false;
3099 break;
3100 }
3101 }
3102 }
3103 }
3104 return retVal;
3105 }
3106
3107 bool VM_Version::os_supports_apx_egprs() {
3108 if (!supports_apx_f()) {
3109 return false;
3110 }
3111 if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3112 _cpuid_info.apx_save[1] != egpr_test_value()) {
3113 return false;
3114 }
3115 return true;
3116 }
3117
3118 uint VM_Version::cores_per_cpu() {
3119 uint result = 1;
3120 if (is_intel()) {
3121 bool supports_topology = supports_processor_topology();
3122 if (supports_topology) {
3123 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3124 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3125 }
3126 if (!supports_topology || result == 0) {
3127 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3128 }
3129 } else if (is_amd_family()) {
3130 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3131 if (cpu_family() >= 0x17) { // Zen or later
3132 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3133 }
3134 } else if (is_zx()) {
3135 bool supports_topology = supports_processor_topology();
3136 if (supports_topology) {
3137 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3138 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3139 }
3140 if (!supports_topology || result == 0) {
3141 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3142 }
3143 }
3144 return result;
3145 }
3146
3147 uint VM_Version::threads_per_core() {
3148 uint result = 1;
3149 if (is_intel() && supports_processor_topology()) {
3150 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3151 } else if (is_zx() && supports_processor_topology()) {
3152 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3153 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3154 if (cpu_family() >= 0x17) {
3155 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3156 } else {
3157 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3158 cores_per_cpu();
3159 }
3160 }
3161 return (result == 0 ? 1 : result);
3162 }
3163
3164 uint VM_Version::L1_line_size() {
3165 uint result = 0;
3166 if (is_intel()) {
3167 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3168 } else if (is_amd_family()) {
3169 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3170 } else if (is_zx()) {
3171 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3172 }
3173 if (result < 32) // not defined ?
3174 result = 32; // 32 bytes by default on x86 and other x64
3175 return result;
3176 }
3177
3178 bool VM_Version::is_intel_tsc_synched_at_init() {
3179 if (is_intel_family_core()) {
3180 uint32_t ext_model = extended_cpu_model();
3181 if (ext_model == CPU_MODEL_NEHALEM_EP ||
3182 ext_model == CPU_MODEL_WESTMERE_EP ||
3183 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3184 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3185 // <= 2-socket invariant tsc support. EX versions are usually used
3186 // in > 2-socket systems and likely don't synchronize tscs at
3187 // initialization.
3188 // Code that uses tsc values must be prepared for them to arbitrarily
3189 // jump forward or backward.
3190 return true;
3191 }
3192 }
3193 return false;
3194 }
3195
3196 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3197 // Hardware prefetching (distance/size in bytes):
3198 // Pentium 3 - 64 / 32
3199 // Pentium 4 - 256 / 128
3200 // Athlon - 64 / 32 ????
3201 // Opteron - 128 / 64 only when 2 sequential cache lines accessed
3202 // Core - 128 / 64
3203 //
3204 // Software prefetching (distance in bytes / instruction with best score):
3205 // Pentium 3 - 128 / prefetchnta
3206 // Pentium 4 - 512 / prefetchnta
3207 // Athlon - 128 / prefetchnta
3208 // Opteron - 256 / prefetchnta
3209 // Core - 256 / prefetchnta
3210 // It will be used only when AllocatePrefetchStyle > 0
3211
3212 if (is_amd_family()) { // AMD | Hygon
3213 return 256; // Opteron
3214 } else if (is_zx()) {
3215 return 256;
3216 } else { // Intel
3217 if (supports_sse3() && is_intel_server_family()) {
3218 if (is_intel_modern_cpu()) { // Nehalem based cpus
3219 return 192;
3220 } else if (use_watermark_prefetch) { // watermark prefetching on Core
3221 return 384;
3222 }
3223 }
3224 if (is_intel_server_family()) {
3225 return 256; // Pentium M, Core, Core2
3226 } else {
3227 return 512; // Pentium 4
3228 }
3229 }
3230 }
3231
3232 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3233 assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3234 switch (id) {
3235 case vmIntrinsics::_floatToFloat16:
3236 case vmIntrinsics::_float16ToFloat:
3237 if (!supports_float16()) {
3238 return false;
3239 }
3240 break;
3241 default:
3242 break;
3243 }
3244 return true;
3245 }
3246
3247 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3248 int i = 0;
3249 ss.join([&]() {
3250 const char* str = nullptr;
3251 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3252 if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3253 str = _features_names[i];
3254 }
3255 i += 1;
3256 }
3257 return str;
3258 }, ", ");
3259 }
3260
3261 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3262 VM_Features* features = (VM_Features*)features_buffer;
3263 insert_features_names(*features, ss);
3264 }
3265
3266 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3267 VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3268 VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3269 int i = 0;
3270 ss.join([&]() {
3271 const char* str = nullptr;
3272 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3273 Feature_Flag flag = (Feature_Flag)i;
3274 if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3275 str = _features_names[i];
3276 }
3277 i += 1;
3278 }
3279 return str;
3280 }, ", ");
3281 }
3282
3283 int VM_Version::cpu_features_size() {
3284 return sizeof(VM_Features);
3285 }
3286
3287 void VM_Version::store_cpu_features(void* buf) {
3288 VM_Features copy = _features.aot_code_cache_features();
3289 memcpy(buf, ©, sizeof(VM_Features));
3290 }
3291
3292 bool VM_Version::verify_aot_code_cache_features(void* features_buffer) {
3293 VM_Features* features_to_test = (VM_Features*)features_buffer;
3294 VM_Features rt_features = _features.aot_code_cache_features();
3295 return rt_features.verify_aot_code_cache_features(features_to_test);
3296 }