1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/codeBlob.hpp"
29 #include "compiler/compilerDefinitions.inline.hpp"
30 #include "jvm.h"
31 #include "logging/log.hpp"
32 #include "logging/logStream.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "memory/universe.hpp"
35 #include "runtime/globals_extension.hpp"
36 #include "runtime/icache.hpp"
37 #include "runtime/java.hpp"
38 #include "runtime/os.inline.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/vm_version.hpp"
41 #include "utilities/checkedCast.hpp"
42 #include "utilities/ostream.hpp"
43 #include "utilities/powerOfTwo.hpp"
44 #include "utilities/virtualizationSupport.hpp"
45
46 int VM_Version::_cpu;
47 int VM_Version::_model;
48 int VM_Version::_stepping;
49 bool VM_Version::_has_intel_jcc_erratum;
50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
51
52 #define DECLARE_CPU_FEATURE_NAME(id, name) XSTR(name),
53 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
54 #undef DECLARE_CPU_FEATURE_NAME
55
56 // Address of instruction which causes SEGV
57 address VM_Version::_cpuinfo_segv_addr = nullptr;
58 // Address of instruction after the one which causes SEGV
59 address VM_Version::_cpuinfo_cont_addr = nullptr;
60 // Address of instruction which causes APX specific SEGV
61 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
62 // Address of instruction after the one which causes APX specific SEGV
63 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
64
65 static BufferBlob* stub_blob;
66 static const int stub_size = 2550;
67
68 VM_Version::VM_Features VM_Version::_features;
69 VM_Version::VM_Features VM_Version::_cpu_features;
70
71 extern "C" {
72 typedef void (*get_cpu_info_stub_t)(void*);
73 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
74 typedef void (*clear_apx_test_state_t)(void);
75 typedef void (*getCPUIDBrandString_stub_t)(void*);
76 }
77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
78 static detect_virt_stub_t detect_virt_stub = nullptr;
79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
80 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
81
82 #define CPUID_STANDARD_FN 0x0
83 #define CPUID_STANDARD_FN_1 0x1
84 #define CPUID_STANDARD_FN_4 0x4
85 #define CPUID_STANDARD_FN_B 0xb
86
87 #define CPUID_EXTENDED_FN 0x80000000
88 #define CPUID_EXTENDED_FN_1 0x80000001
89 #define CPUID_EXTENDED_FN_2 0x80000002
90 #define CPUID_EXTENDED_FN_3 0x80000003
91 #define CPUID_EXTENDED_FN_4 0x80000004
92 #define CPUID_EXTENDED_FN_7 0x80000007
93 #define CPUID_EXTENDED_FN_8 0x80000008
94
95 class VM_Version_StubGenerator: public StubCodeGenerator {
96 public:
97
98 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
99
100 address clear_apx_test_state() {
101 # define __ _masm->
102 address start = __ pc();
103 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
104 // handling guarantees that preserved register values post signal handling were
105 // re-instantiated by operating system and not because they were not modified externally.
106
107 bool save_apx = UseAPX;
108 VM_Version::set_apx_cpuFeatures();
109 UseAPX = true;
110 // EGPR state save/restoration.
111 __ mov64(r16, 0L);
112 __ mov64(r31, 0L);
113 UseAPX = save_apx;
114 VM_Version::clean_cpuFeatures();
115 __ ret(0);
116 return start;
117 }
118
119 address generate_get_cpu_info() {
120 // Flags to test CPU type.
121 const uint32_t HS_EFL_AC = 0x40000;
122 const uint32_t HS_EFL_ID = 0x200000;
123 // Values for when we don't have a CPUID instruction.
124 const int CPU_FAMILY_SHIFT = 8;
125 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
126 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
127 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
128
129 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
130 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
131 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
132 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
133
134 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
135 # define __ _masm->
136
137 address start = __ pc();
138
139 //
140 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
141 //
142 // rcx and rdx are first and second argument registers on windows
143
144 __ push(rbp);
145 __ mov(rbp, c_rarg0); // cpuid_info address
146 __ push(rbx);
147 __ push(rsi);
148 __ pushf(); // preserve rbx, and flags
149 __ pop(rax);
150 __ push(rax);
151 __ mov(rcx, rax);
152 //
153 // if we are unable to change the AC flag, we have a 386
154 //
155 __ xorl(rax, HS_EFL_AC);
156 __ push(rax);
157 __ popf();
158 __ pushf();
159 __ pop(rax);
160 __ cmpptr(rax, rcx);
161 __ jccb(Assembler::notEqual, detect_486);
162
163 __ movl(rax, CPU_FAMILY_386);
164 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
165 __ jmp(done);
166
167 //
168 // If we are unable to change the ID flag, we have a 486 which does
169 // not support the "cpuid" instruction.
170 //
171 __ bind(detect_486);
172 __ mov(rax, rcx);
173 __ xorl(rax, HS_EFL_ID);
174 __ push(rax);
175 __ popf();
176 __ pushf();
177 __ pop(rax);
178 __ cmpptr(rcx, rax);
179 __ jccb(Assembler::notEqual, detect_586);
180
181 __ bind(cpu486);
182 __ movl(rax, CPU_FAMILY_486);
183 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
184 __ jmp(done);
185
186 //
187 // At this point, we have a chip which supports the "cpuid" instruction
188 //
189 __ bind(detect_586);
190 __ xorl(rax, rax);
191 __ cpuid();
192 __ orl(rax, rax);
193 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
194 // value of at least 1, we give up and
195 // assume a 486
196 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
197 __ movl(Address(rsi, 0), rax);
198 __ movl(Address(rsi, 4), rbx);
199 __ movl(Address(rsi, 8), rcx);
200 __ movl(Address(rsi,12), rdx);
201
202 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
203 __ jccb(Assembler::belowEqual, std_cpuid4);
204
205 //
206 // cpuid(0xB) Processor Topology
207 //
208 __ movl(rax, 0xb);
209 __ xorl(rcx, rcx); // Threads level
210 __ cpuid();
211
212 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
213 __ movl(Address(rsi, 0), rax);
214 __ movl(Address(rsi, 4), rbx);
215 __ movl(Address(rsi, 8), rcx);
216 __ movl(Address(rsi,12), rdx);
217
218 __ movl(rax, 0xb);
219 __ movl(rcx, 1); // Cores level
220 __ cpuid();
221 __ push(rax);
222 __ andl(rax, 0x1f); // Determine if valid topology level
223 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
224 __ andl(rax, 0xffff);
225 __ pop(rax);
226 __ jccb(Assembler::equal, std_cpuid4);
227
228 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
229 __ movl(Address(rsi, 0), rax);
230 __ movl(Address(rsi, 4), rbx);
231 __ movl(Address(rsi, 8), rcx);
232 __ movl(Address(rsi,12), rdx);
233
234 __ movl(rax, 0xb);
235 __ movl(rcx, 2); // Packages level
236 __ cpuid();
237 __ push(rax);
238 __ andl(rax, 0x1f); // Determine if valid topology level
239 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
240 __ andl(rax, 0xffff);
241 __ pop(rax);
242 __ jccb(Assembler::equal, std_cpuid4);
243
244 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
245 __ movl(Address(rsi, 0), rax);
246 __ movl(Address(rsi, 4), rbx);
247 __ movl(Address(rsi, 8), rcx);
248 __ movl(Address(rsi,12), rdx);
249
250 //
251 // cpuid(0x4) Deterministic cache params
252 //
253 __ bind(std_cpuid4);
254 __ movl(rax, 4);
255 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
256 __ jccb(Assembler::greater, std_cpuid1);
257
258 __ xorl(rcx, rcx); // L1 cache
259 __ cpuid();
260 __ push(rax);
261 __ andl(rax, 0x1f); // Determine if valid cache parameters used
262 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
263 __ pop(rax);
264 __ jccb(Assembler::equal, std_cpuid1);
265
266 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
267 __ movl(Address(rsi, 0), rax);
268 __ movl(Address(rsi, 4), rbx);
269 __ movl(Address(rsi, 8), rcx);
270 __ movl(Address(rsi,12), rdx);
271
272 //
273 // Standard cpuid(0x1)
274 //
275 __ bind(std_cpuid1);
276 __ movl(rax, 1);
277 __ cpuid();
278 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
279 __ movl(Address(rsi, 0), rax);
280 __ movl(Address(rsi, 4), rbx);
281 __ movl(Address(rsi, 8), rcx);
282 __ movl(Address(rsi,12), rdx);
283
284 //
285 // Check if OS has enabled XGETBV instruction to access XCR0
286 // (OSXSAVE feature flag) and CPU supports AVX
287 //
288 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
289 __ cmpl(rcx, 0x18000000);
290 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
291
292 //
293 // XCR0, XFEATURE_ENABLED_MASK register
294 //
295 __ xorl(rcx, rcx); // zero for XCR0 register
296 __ xgetbv();
297 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
298 __ movl(Address(rsi, 0), rax);
299 __ movl(Address(rsi, 4), rdx);
300
301 //
302 // cpuid(0x7) Structured Extended Features Enumeration Leaf.
303 //
304 __ bind(sef_cpuid);
305 __ movl(rax, 7);
306 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
307 __ jccb(Assembler::greater, ext_cpuid);
308 // ECX = 0
309 __ xorl(rcx, rcx);
310 __ cpuid();
311 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
312 __ movl(Address(rsi, 0), rax);
313 __ movl(Address(rsi, 4), rbx);
314 __ movl(Address(rsi, 8), rcx);
315 __ movl(Address(rsi, 12), rdx);
316
317 //
318 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
319 //
320 __ bind(sefsl1_cpuid);
321 __ movl(rax, 7);
322 __ movl(rcx, 1);
323 __ cpuid();
324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
325 __ movl(Address(rsi, 0), rax);
326 __ movl(Address(rsi, 4), rdx);
327
328 //
329 // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
330 //
331 __ bind(std_cpuid29);
332 __ movl(rax, 0x29);
333 __ movl(rcx, 0);
334 __ cpuid();
335 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
336 __ movl(Address(rsi, 0), rbx);
337
338 //
339 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
340 //
341 __ bind(std_cpuid24);
342 __ movl(rax, 0x24);
343 __ movl(rcx, 0);
344 __ cpuid();
345 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
346 __ movl(Address(rsi, 0), rax);
347 __ movl(Address(rsi, 4), rbx);
348
349 //
350 // Extended cpuid(0x80000000)
351 //
352 __ bind(ext_cpuid);
353 __ movl(rax, 0x80000000);
354 __ cpuid();
355 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
356 __ jcc(Assembler::belowEqual, done);
357 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
358 __ jcc(Assembler::belowEqual, ext_cpuid1);
359 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
360 __ jccb(Assembler::belowEqual, ext_cpuid5);
361 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
362 __ jccb(Assembler::belowEqual, ext_cpuid7);
363 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
364 __ jccb(Assembler::belowEqual, ext_cpuid8);
365 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
366 __ jccb(Assembler::below, ext_cpuid8);
367 //
368 // Extended cpuid(0x8000001E)
369 //
370 __ movl(rax, 0x8000001E);
371 __ cpuid();
372 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
373 __ movl(Address(rsi, 0), rax);
374 __ movl(Address(rsi, 4), rbx);
375 __ movl(Address(rsi, 8), rcx);
376 __ movl(Address(rsi,12), rdx);
377
378 //
379 // Extended cpuid(0x80000008)
380 //
381 __ bind(ext_cpuid8);
382 __ movl(rax, 0x80000008);
383 __ cpuid();
384 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
385 __ movl(Address(rsi, 0), rax);
386 __ movl(Address(rsi, 4), rbx);
387 __ movl(Address(rsi, 8), rcx);
388 __ movl(Address(rsi,12), rdx);
389
390 //
391 // Extended cpuid(0x80000007)
392 //
393 __ bind(ext_cpuid7);
394 __ movl(rax, 0x80000007);
395 __ cpuid();
396 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
397 __ movl(Address(rsi, 0), rax);
398 __ movl(Address(rsi, 4), rbx);
399 __ movl(Address(rsi, 8), rcx);
400 __ movl(Address(rsi,12), rdx);
401
402 //
403 // Extended cpuid(0x80000005)
404 //
405 __ bind(ext_cpuid5);
406 __ movl(rax, 0x80000005);
407 __ cpuid();
408 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
409 __ movl(Address(rsi, 0), rax);
410 __ movl(Address(rsi, 4), rbx);
411 __ movl(Address(rsi, 8), rcx);
412 __ movl(Address(rsi,12), rdx);
413
414 //
415 // Extended cpuid(0x80000001)
416 //
417 __ bind(ext_cpuid1);
418 __ movl(rax, 0x80000001);
419 __ cpuid();
420 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
421 __ movl(Address(rsi, 0), rax);
422 __ movl(Address(rsi, 4), rbx);
423 __ movl(Address(rsi, 8), rcx);
424 __ movl(Address(rsi,12), rdx);
425
426 //
427 // Check if OS has enabled XGETBV instruction to access XCR0
428 // (OSXSAVE feature flag) and CPU supports APX
429 //
430 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
431 // and XCRO[19] bit for OS support to save/restore extended GPR state.
432 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
433 __ movl(rax, 0x200000);
434 __ andl(rax, Address(rsi, 4));
435 __ jcc(Assembler::equal, vector_save_restore);
436 // check _cpuid_info.xem_xcr0_eax.bits.apx_f
437 __ movl(rax, 0x80000);
438 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
439 __ jcc(Assembler::equal, vector_save_restore);
440
441 bool save_apx = UseAPX;
442 VM_Version::set_apx_cpuFeatures();
443 UseAPX = true;
444 __ mov64(r16, VM_Version::egpr_test_value());
445 __ mov64(r31, VM_Version::egpr_test_value());
446 __ xorl(rsi, rsi);
447 VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
448 // Generate SEGV
449 __ movl(rax, Address(rsi, 0));
450
451 VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
452 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
453 __ movq(Address(rsi, 0), r16);
454 __ movq(Address(rsi, 8), r31);
455
456 //
457 // Query CPUID 0xD.19 for APX XSAVE offset
458 // Extended State Enumeration Sub-leaf 19 (APX)
459 // EAX = size of APX state (should be 128)
460 // EBX = offset in standard XSAVE format
461 //
462 __ movl(rax, 0xD);
463 __ movl(rcx, 19);
464 __ cpuid();
465 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
466 __ movl(Address(rsi, 0), rax);
467 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
468 __ movl(Address(rsi, 0), rbx);
469
470 UseAPX = save_apx;
471 __ bind(vector_save_restore);
472 //
473 // Check if OS has enabled XGETBV instruction to access XCR0
474 // (OSXSAVE feature flag) and CPU supports AVX
475 //
476 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
477 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
478 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
479 __ cmpl(rcx, 0x18000000);
480 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
481
482 __ movl(rax, 0x6);
483 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
484 __ cmpl(rax, 0x6);
485 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
486
487 // we need to bridge farther than imm8, so we use this island as a thunk
488 __ bind(done);
489 __ jmp(wrapup);
490
491 __ bind(start_simd_check);
492 //
493 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
494 // registers are not restored after a signal processing.
495 // Generate SEGV here (reference through null)
496 // and check upper YMM/ZMM bits after it.
497 //
498 int saved_useavx = UseAVX;
499
500 // If UseAVX is uninitialized or is set by the user to include EVEX
501 if (use_evex) {
502 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
503 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
504 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
505 __ movl(rax, 0x10000);
506 __ andl(rax, Address(rsi, 4));
507 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
508 __ movl(rbx, 0x80000);
509 __ andl(rbx, Address(rsi, 4));
510 __ orl(rax, rbx);
511 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
512 // check _cpuid_info.xem_xcr0_eax.bits.opmask
513 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
514 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
515 __ movl(rax, 0xE0);
516 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
517 __ cmpl(rax, 0xE0);
518 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
519
520 if (FLAG_IS_DEFAULT(UseAVX)) {
521 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
522 __ movl(rax, Address(rsi, 0));
523 __ cmpl(rax, 0x50654); // If it is Skylake
524 __ jcc(Assembler::equal, legacy_setup);
525 }
526 // EVEX setup: run in lowest evex mode
527 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
528 UseAVX = 3;
529 #ifdef _WINDOWS
530 // xmm5-xmm15 are not preserved by caller on windows
531 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
532 __ subptr(rsp, 64);
533 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
534 __ subptr(rsp, 64);
535 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
536 __ subptr(rsp, 64);
537 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
538 #endif // _WINDOWS
539
540 // load value into all 64 bytes of zmm7 register
541 __ movl(rcx, VM_Version::ymm_test_value());
542 __ movdl(xmm0, rcx);
543 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
544 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
545 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
546 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
547 VM_Version::clean_cpuFeatures();
548 __ jmp(save_restore_except);
549 }
550
551 __ bind(legacy_setup);
552 // AVX setup
553 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
554 UseAVX = 1;
555 #ifdef _WINDOWS
556 __ subptr(rsp, 32);
557 __ vmovdqu(Address(rsp, 0), xmm7);
558 __ subptr(rsp, 32);
559 __ vmovdqu(Address(rsp, 0), xmm8);
560 __ subptr(rsp, 32);
561 __ vmovdqu(Address(rsp, 0), xmm15);
562 #endif // _WINDOWS
563
564 // load value into all 32 bytes of ymm7 register
565 __ movl(rcx, VM_Version::ymm_test_value());
566
567 __ movdl(xmm0, rcx);
568 __ pshufd(xmm0, xmm0, 0x00);
569 __ vinsertf128_high(xmm0, xmm0);
570 __ vmovdqu(xmm7, xmm0);
571 __ vmovdqu(xmm8, xmm0);
572 __ vmovdqu(xmm15, xmm0);
573 VM_Version::clean_cpuFeatures();
574
575 __ bind(save_restore_except);
576 __ xorl(rsi, rsi);
577 VM_Version::set_cpuinfo_segv_addr(__ pc());
578 // Generate SEGV
579 __ movl(rax, Address(rsi, 0));
580
581 VM_Version::set_cpuinfo_cont_addr(__ pc());
582 // Returns here after signal. Save xmm0 to check it later.
583
584 // If UseAVX is uninitialized or is set by the user to include EVEX
585 if (use_evex) {
586 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
587 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
588 __ movl(rax, 0x10000);
589 __ andl(rax, Address(rsi, 4));
590 __ jcc(Assembler::equal, legacy_save_restore);
591 // check _cpuid_info.xem_xcr0_eax.bits.opmask
592 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
593 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
594 __ movl(rax, 0xE0);
595 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
596 __ cmpl(rax, 0xE0);
597 __ jcc(Assembler::notEqual, legacy_save_restore);
598
599 if (FLAG_IS_DEFAULT(UseAVX)) {
600 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
601 __ movl(rax, Address(rsi, 0));
602 __ cmpl(rax, 0x50654); // If it is Skylake
603 __ jcc(Assembler::equal, legacy_save_restore);
604 }
605 // EVEX check: run in lowest evex mode
606 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
607 UseAVX = 3;
608 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
609 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
610 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
611 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
612 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
613
614 #ifdef _WINDOWS
615 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
616 __ addptr(rsp, 64);
617 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
618 __ addptr(rsp, 64);
619 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
620 __ addptr(rsp, 64);
621 #endif // _WINDOWS
622 generate_vzeroupper(wrapup);
623 VM_Version::clean_cpuFeatures();
624 UseAVX = saved_useavx;
625 __ jmp(wrapup);
626 }
627
628 __ bind(legacy_save_restore);
629 // AVX check
630 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
631 UseAVX = 1;
632 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
633 __ vmovdqu(Address(rsi, 0), xmm0);
634 __ vmovdqu(Address(rsi, 32), xmm7);
635 __ vmovdqu(Address(rsi, 64), xmm8);
636 __ vmovdqu(Address(rsi, 96), xmm15);
637
638 #ifdef _WINDOWS
639 __ vmovdqu(xmm15, Address(rsp, 0));
640 __ addptr(rsp, 32);
641 __ vmovdqu(xmm8, Address(rsp, 0));
642 __ addptr(rsp, 32);
643 __ vmovdqu(xmm7, Address(rsp, 0));
644 __ addptr(rsp, 32);
645 #endif // _WINDOWS
646
647 generate_vzeroupper(wrapup);
648 VM_Version::clean_cpuFeatures();
649 UseAVX = saved_useavx;
650
651 __ bind(wrapup);
652 __ popf();
653 __ pop(rsi);
654 __ pop(rbx);
655 __ pop(rbp);
656 __ ret(0);
657
658 # undef __
659
660 return start;
661 };
662 void generate_vzeroupper(Label& L_wrapup) {
663 # define __ _masm->
664 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
665 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
666 __ jcc(Assembler::notEqual, L_wrapup);
667 __ movl(rcx, 0x0FFF0FF0);
668 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
669 __ andl(rcx, Address(rsi, 0));
670 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
671 __ jcc(Assembler::equal, L_wrapup);
672 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
673 __ jcc(Assembler::equal, L_wrapup);
674 // vzeroupper() will use a pre-computed instruction sequence that we
675 // can't compute until after we've determined CPU capabilities. Use
676 // uncached variant here directly to be able to bootstrap correctly
677 __ vzeroupper_uncached();
678 # undef __
679 }
680 address generate_detect_virt() {
681 StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
682 # define __ _masm->
683
684 address start = __ pc();
685
686 // Evacuate callee-saved registers
687 __ push(rbp);
688 __ push(rbx);
689 __ push(rsi); // for Windows
690
691 __ mov(rax, c_rarg0); // CPUID leaf
692 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
693
694 __ cpuid();
695
696 // Store result to register array
697 __ movl(Address(rsi, 0), rax);
698 __ movl(Address(rsi, 4), rbx);
699 __ movl(Address(rsi, 8), rcx);
700 __ movl(Address(rsi, 12), rdx);
701
702 // Epilogue
703 __ pop(rsi);
704 __ pop(rbx);
705 __ pop(rbp);
706 __ ret(0);
707
708 # undef __
709
710 return start;
711 };
712
713
714 address generate_getCPUIDBrandString(void) {
715 // Flags to test CPU type.
716 const uint32_t HS_EFL_AC = 0x40000;
717 const uint32_t HS_EFL_ID = 0x200000;
718 // Values for when we don't have a CPUID instruction.
719 const int CPU_FAMILY_SHIFT = 8;
720 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
721 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
722
723 Label detect_486, cpu486, detect_586, done, ext_cpuid;
724
725 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
726 # define __ _masm->
727
728 address start = __ pc();
729
730 //
731 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
732 //
733 // rcx and rdx are first and second argument registers on windows
734
735 __ push(rbp);
736 __ mov(rbp, c_rarg0); // cpuid_info address
737 __ push(rbx);
738 __ push(rsi);
739 __ pushf(); // preserve rbx, and flags
740 __ pop(rax);
741 __ push(rax);
742 __ mov(rcx, rax);
743 //
744 // if we are unable to change the AC flag, we have a 386
745 //
746 __ xorl(rax, HS_EFL_AC);
747 __ push(rax);
748 __ popf();
749 __ pushf();
750 __ pop(rax);
751 __ cmpptr(rax, rcx);
752 __ jccb(Assembler::notEqual, detect_486);
753
754 __ movl(rax, CPU_FAMILY_386);
755 __ jmp(done);
756
757 //
758 // If we are unable to change the ID flag, we have a 486 which does
759 // not support the "cpuid" instruction.
760 //
761 __ bind(detect_486);
762 __ mov(rax, rcx);
763 __ xorl(rax, HS_EFL_ID);
764 __ push(rax);
765 __ popf();
766 __ pushf();
767 __ pop(rax);
768 __ cmpptr(rcx, rax);
769 __ jccb(Assembler::notEqual, detect_586);
770
771 __ bind(cpu486);
772 __ movl(rax, CPU_FAMILY_486);
773 __ jmp(done);
774
775 //
776 // At this point, we have a chip which supports the "cpuid" instruction
777 //
778 __ bind(detect_586);
779 __ xorl(rax, rax);
780 __ cpuid();
781 __ orl(rax, rax);
782 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
783 // value of at least 1, we give up and
784 // assume a 486
785
786 //
787 // Extended cpuid(0x80000000) for processor brand string detection
788 //
789 __ bind(ext_cpuid);
790 __ movl(rax, CPUID_EXTENDED_FN);
791 __ cpuid();
792 __ cmpl(rax, CPUID_EXTENDED_FN_4);
793 __ jcc(Assembler::below, done);
794
795 //
796 // Extended cpuid(0x80000002) // first 16 bytes in brand string
797 //
798 __ movl(rax, CPUID_EXTENDED_FN_2);
799 __ cpuid();
800 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
801 __ movl(Address(rsi, 0), rax);
802 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
803 __ movl(Address(rsi, 0), rbx);
804 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
805 __ movl(Address(rsi, 0), rcx);
806 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
807 __ movl(Address(rsi,0), rdx);
808
809 //
810 // Extended cpuid(0x80000003) // next 16 bytes in brand string
811 //
812 __ movl(rax, CPUID_EXTENDED_FN_3);
813 __ cpuid();
814 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
815 __ movl(Address(rsi, 0), rax);
816 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
817 __ movl(Address(rsi, 0), rbx);
818 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
819 __ movl(Address(rsi, 0), rcx);
820 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
821 __ movl(Address(rsi,0), rdx);
822
823 //
824 // Extended cpuid(0x80000004) // last 16 bytes in brand string
825 //
826 __ movl(rax, CPUID_EXTENDED_FN_4);
827 __ cpuid();
828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
829 __ movl(Address(rsi, 0), rax);
830 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
831 __ movl(Address(rsi, 0), rbx);
832 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
833 __ movl(Address(rsi, 0), rcx);
834 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
835 __ movl(Address(rsi,0), rdx);
836
837 //
838 // return
839 //
840 __ bind(done);
841 __ popf();
842 __ pop(rsi);
843 __ pop(rbx);
844 __ pop(rbp);
845 __ ret(0);
846
847 # undef __
848
849 return start;
850 };
851 };
852
853 void VM_Version::get_processor_features() {
854
855 _cpu = 4; // 486 by default
856 _model = 0;
857 _stepping = 0;
858 _logical_processors_per_package = 1;
859 // i486 internal cache is both I&D and has a 16-byte line size
860 _L1_data_cache_line_size = 16;
861
862 // Get raw processor info
863
864 get_cpu_info_stub(&_cpuid_info);
865
866 assert_is_initialized();
867 _cpu = extended_cpu_family();
868 _model = extended_cpu_model();
869 _stepping = cpu_stepping();
870
871 if (cpu_family() > 4) { // it supports CPUID
872 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
873 _cpu_features = _features; // Preserve features
874 // Logical processors are only available on P4s and above,
875 // and only if hyperthreading is available.
876 _logical_processors_per_package = logical_processor_count();
877 _L1_data_cache_line_size = L1_line_size();
878 }
879
880 // xchg and xadd instructions
881 _supports_atomic_getset4 = true;
882 _supports_atomic_getadd4 = true;
883 _supports_atomic_getset8 = true;
884 _supports_atomic_getadd8 = true;
885
886 // assigning this field effectively enables Unsafe.writebackMemory()
887 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
888 // that is only implemented on x86_64 and only if the OS plays ball
889 if (os::supports_map_sync()) {
890 // publish data cache line flush size to generic field, otherwise
891 // let if default to zero thereby disabling writeback
892 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
893 }
894
895 // Check if processor has Intel Ecore
896 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
897 (supports_hybrid() ||
898 _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
899 _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
900 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
901 }
902
903 if (UseSSE < 4) {
904 clear_feature(CPU_SSE4_1);
905 clear_feature(CPU_SSE4_2);
906 }
907
908 if (UseSSE < 3) {
909 clear_feature(CPU_SSE3);
910 clear_feature(CPU_SSSE3);
911 clear_feature(CPU_SSE4A);
912 }
913
914 // ZX cpus specific settings
915 if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
916 if (cpu_family() == 7) {
917 if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
918 UseAVX = 1;
919 } else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
920 UseAVX = 0;
921 }
922 } else if (cpu_family() == 6) {
923 UseAVX = 0;
924 }
925 }
926
927 // UseSSE is set to the smaller of what hardware supports and what
928 // the command line requires. i.e., you cannot set UseSSE to 4 on
929 // older systems which do not support it.
930 int use_sse_limit = 2;
931 if (UseSSE > 3 && supports_sse4_1()) {
932 use_sse_limit = 4;
933 } else if (UseSSE > 2 && supports_sse3()) {
934 use_sse_limit = 3;
935 }
936 if (FLAG_IS_DEFAULT(UseSSE)) {
937 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
938 } else if (UseSSE > use_sse_limit) {
939 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
940 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
941 }
942
943 // first try initial setting and detect what we can support
944 int use_avx_limit = 0;
945 if (UseAVX > 0) {
946 if (UseSSE < 4) {
947 // Don't use AVX if SSE is unavailable or has been disabled.
948 use_avx_limit = 0;
949 } else if (UseAVX > 2 && supports_evex()) {
950 use_avx_limit = 3;
951 } else if (UseAVX > 1 && supports_avx2()) {
952 use_avx_limit = 2;
953 } else if (UseAVX > 0 && supports_avx()) {
954 use_avx_limit = 1;
955 } else {
956 use_avx_limit = 0;
957 }
958 }
959 if (FLAG_IS_DEFAULT(UseAVX)) {
960 // Don't use AVX-512 on older Skylakes unless explicitly requested.
961 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
962 FLAG_SET_DEFAULT(UseAVX, 2);
963 } else {
964 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
965 }
966 }
967
968 if (UseAVX > use_avx_limit) {
969 if (UseSSE < 4) {
970 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
971 } else {
972 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
973 }
974 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
975 }
976
977 if (UseAVX < 3) {
978 clear_feature(CPU_AVX512F);
979 clear_feature(CPU_AVX512DQ);
980 clear_feature(CPU_AVX512CD);
981 clear_feature(CPU_AVX512BW);
982 clear_feature(CPU_AVX512ER);
983 clear_feature(CPU_AVX512PF);
984 clear_feature(CPU_AVX512VL);
985 clear_feature(CPU_AVX512_VPOPCNTDQ);
986 clear_feature(CPU_AVX512_VPCLMULQDQ);
987 clear_feature(CPU_AVX512_VAES);
988 clear_feature(CPU_AVX512_VNNI);
989 clear_feature(CPU_AVX512_VBMI);
990 clear_feature(CPU_AVX512_VBMI2);
991 clear_feature(CPU_AVX512_BITALG);
992 clear_feature(CPU_AVX512_IFMA);
993 clear_feature(CPU_APX_F);
994 clear_feature(CPU_AVX512_FP16);
995 clear_feature(CPU_AVX10_1);
996 clear_feature(CPU_AVX10_2);
997 }
998
999
1000 if (UseAVX < 2) {
1001 clear_feature(CPU_AVX2);
1002 clear_feature(CPU_AVX_IFMA);
1003 }
1004
1005 if (UseAVX < 1) {
1006 clear_feature(CPU_AVX);
1007 clear_feature(CPU_VZEROUPPER);
1008 clear_feature(CPU_F16C);
1009 clear_feature(CPU_SHA512);
1010 }
1011
1012 if (logical_processors_per_package() == 1) {
1013 // HT processor could be installed on a system which doesn't support HT.
1014 clear_feature(CPU_HT);
1015 }
1016
1017 if (is_intel()) { // Intel cpus specific settings
1018 if (is_knights_family()) {
1019 clear_feature(CPU_VZEROUPPER);
1020 clear_feature(CPU_AVX512BW);
1021 clear_feature(CPU_AVX512VL);
1022 clear_feature(CPU_APX_F);
1023 clear_feature(CPU_AVX512DQ);
1024 clear_feature(CPU_AVX512_VNNI);
1025 clear_feature(CPU_AVX512_VAES);
1026 clear_feature(CPU_AVX512_VPOPCNTDQ);
1027 clear_feature(CPU_AVX512_VPCLMULQDQ);
1028 clear_feature(CPU_AVX512_VBMI);
1029 clear_feature(CPU_AVX512_VBMI2);
1030 clear_feature(CPU_CLWB);
1031 clear_feature(CPU_FLUSHOPT);
1032 clear_feature(CPU_GFNI);
1033 clear_feature(CPU_AVX512_BITALG);
1034 clear_feature(CPU_AVX512_IFMA);
1035 clear_feature(CPU_AVX_IFMA);
1036 clear_feature(CPU_AVX512_FP16);
1037 clear_feature(CPU_AVX10_1);
1038 clear_feature(CPU_AVX10_2);
1039 }
1040 }
1041
1042 // Currently APX support is only enabled for targets supporting AVX512VL feature.
1043 if (supports_apx_f() && os_supports_apx_egprs() && supports_avx512vl()) {
1044 if (FLAG_IS_DEFAULT(UseAPX)) {
1045 FLAG_SET_DEFAULT(UseAPX, false); // by default UseAPX is false
1046 clear_feature(CPU_APX_F);
1047 } else if (!UseAPX) {
1048 clear_feature(CPU_APX_F);
1049 }
1050 } else {
1051 if (!os_supports_apx_egprs() || !supports_avx512vl()) {
1052 clear_feature(CPU_APX_F);
1053 }
1054 if (UseAPX) {
1055 if (!FLAG_IS_DEFAULT(UseAPX)) {
1056 warning("APX instructions are not available on this CPU");
1057 }
1058 FLAG_SET_DEFAULT(UseAPX, false);
1059 }
1060 }
1061
1062 CHECK_CPU_FEATURE(UseCLMUL, CLMUL, supports_clmul(), "CLMUL" MULTI_INST_WARNING_MSG);
1063 CHECK_CPU_FEATURE(UseAES, AES, supports_aes(), "AES" MULTI_INST_WARNING_MSG);
1064 CHECK_CPU_FEATURE(UseFMA, FMA, supports_fma(), "FMA" MULTI_INST_WARNING_MSG);
1065 CHECK_CPU_FEATURE(UseCountLeadingZerosInstruction, LZCNT, supports_lzcnt(), "lzcnt" SINGLE_INST_WARNING_MSG);
1066 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1067 // VEX prefix is generated only when AVX > 0.
1068 CHECK_CPU_FEATURE(UseBMI1Instructions, BMI1, supports_bmi1(), "BMI1" MULTI_INST_WARNING_MSG);
1069
1070 if (supports_bmi2() && supports_avx()) {
1071 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1072 FLAG_SET_DEFAULT(UseBMI2Instructions, true);
1073 } else if (!UseBMI2Instructions) {
1074 clear_feature(CPU_BMI2);
1075 }
1076 } else {
1077 if (!supports_avx()) {
1078 clear_feature(CPU_BMI2);
1079 }
1080 if (UseBMI2Instructions) {
1081 if (!FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1082 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1083 }
1084 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1085 }
1086 }
1087
1088 CHECK_CPU_FEATURE(UsePopCountInstruction, POPCNT, supports_popcnt(), "popcnt" SINGLE_INST_WARNING_MSG);
1089 CHECK_CPU_FEATURE(UseSHA, SHA, supports_sha() || (supports_avx2() && supports_bmi2()), "SHA" MULTI_INST_WARNING_MSG);
1090
1091 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1092 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1093 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1094 } else {
1095 _has_intel_jcc_erratum = IntelJccErratumMitigation;
1096 }
1097
1098 if (X86ICacheSync == -1) {
1099 // Auto-detect, choosing the best performant one that still flushes
1100 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1101 if (supports_clwb()) {
1102 FLAG_SET_ERGO(X86ICacheSync, 3);
1103 } else if (supports_clflushopt()) {
1104 FLAG_SET_ERGO(X86ICacheSync, 2);
1105 } else {
1106 FLAG_SET_ERGO(X86ICacheSync, 1);
1107 }
1108 } else {
1109 if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1110 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1111 }
1112 if ((X86ICacheSync == 3) && !supports_clwb()) {
1113 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1114 }
1115 if ((X86ICacheSync == 5) && !supports_serialize()) {
1116 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1117 }
1118 }
1119
1120 stringStream ss(2048);
1121 if (supports_hybrid()) {
1122 ss.print("(hybrid)");
1123 } else {
1124 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1125 }
1126 ss.print(" family %d model %d stepping %d microcode 0x%x",
1127 cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1128 ss.print(", ");
1129 int features_offset = (int)ss.size();
1130 insert_features_names(_features, ss);
1131
1132 _cpu_info_string = ss.as_string(true);
1133 _features_string = _cpu_info_string + features_offset;
1134
1135 // Use AES instructions if available.
1136 if (supports_aes()) {
1137 if (supports_sse3()) {
1138 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1139 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1140 }
1141 } else if (UseAESIntrinsics) {
1142 // The AES intrinsic stubs require AES instruction support (of course)
1143 // but also require sse3 mode or higher for instructions it use.
1144 if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1145 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1146 }
1147 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1148 }
1149 if (!UseAESIntrinsics) {
1150 if (UseAESCTRIntrinsics) {
1151 if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1152 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1153 }
1154 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1155 }
1156 } else {
1157 if (supports_sse4_1()) {
1158 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1159 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1160 }
1161 } else if (UseAESCTRIntrinsics) {
1162 // The AES-CTR intrinsic stubs require AES instruction support (of course)
1163 // but also require sse4.1 mode or higher for instructions it use.
1164 if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1165 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1166 }
1167 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1168 }
1169 }
1170 } else {
1171 if (!cpu_supports_aes()) {
1172 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1173 warning("AES intrinsics are not available on this CPU");
1174 }
1175 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1176 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1177 warning("AES-CTR intrinsics are not available on this CPU");
1178 }
1179 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1180 } else if (!UseAES) {
1181 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1182 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1183 }
1184 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1185 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1186 warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1187 }
1188 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1189 }
1190 }
1191
1192 if (UseCLMUL && (UseSSE > 2)) {
1193 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1194 UseCRC32Intrinsics = true;
1195 }
1196 } else if (UseCRC32Intrinsics) {
1197 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1198 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1199 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1200 }
1201
1202 if (supports_avx2()) {
1203 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1204 UseAdler32Intrinsics = true;
1205 }
1206 } else if (UseAdler32Intrinsics) {
1207 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1208 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1209 }
1210 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1211 }
1212
1213 if (supports_sse4_2() && supports_clmul()) {
1214 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1215 UseCRC32CIntrinsics = true;
1216 }
1217 } else if (UseCRC32CIntrinsics) {
1218 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1219 warning("CRC32C intrinsics are not available on this CPU");
1220 }
1221 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1222 }
1223
1224 // GHASH/GCM intrinsics
1225 if (UseCLMUL && (UseSSE > 2)) {
1226 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1227 UseGHASHIntrinsics = true;
1228 }
1229 } else if (UseGHASHIntrinsics) {
1230 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1231 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1232 }
1233 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1234 }
1235
1236 // ChaCha20 Intrinsics
1237 // As long as the system supports AVX as a baseline we can do a
1238 // SIMD-enabled block function. StubGenerator makes the determination
1239 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1240 // version.
1241 if (UseAVX >= 1) {
1242 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1243 UseChaCha20Intrinsics = true;
1244 }
1245 } else if (UseChaCha20Intrinsics) {
1246 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1247 warning("ChaCha20 intrinsic requires AVX instructions");
1248 }
1249 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1250 }
1251
1252 // Kyber Intrinsics
1253 // Currently we only have them for AVX512
1254 if (supports_evex() && supports_avx512bw()) {
1255 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1256 UseKyberIntrinsics = true;
1257 }
1258 } else if (UseKyberIntrinsics) {
1259 if (!FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1260 warning("Intrinsics for ML-KEM are not available on this CPU.");
1261 }
1262 FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1263 }
1264
1265 // Dilithium Intrinsics
1266 if (UseAVX > 1) {
1267 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1268 UseDilithiumIntrinsics = true;
1269 }
1270 } else if (UseDilithiumIntrinsics) {
1271 if (!FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1272 warning("Intrinsics for ML-DSA are not available on this CPU.");
1273 }
1274 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1275 }
1276
1277 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1278 if (UseAVX >= 2) {
1279 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1280 UseBASE64Intrinsics = true;
1281 }
1282 } else if (UseBASE64Intrinsics) {
1283 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1284 warning("Base64 intrinsic requires EVEX instructions on this CPU");
1285 }
1286 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1287 }
1288
1289 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1290 UseMD5Intrinsics = true;
1291 }
1292
1293 if (supports_sha() && supports_sse4_1() && UseSHA) {
1294 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1295 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1296 }
1297 } else if (UseSHA1Intrinsics) {
1298 if (!FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1299 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1300 }
1301 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1302 }
1303
1304 if (supports_sse4_1() && UseSHA) {
1305 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1306 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1307 }
1308 } else if (UseSHA256Intrinsics) {
1309 if (!FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1310 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1311 }
1312 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1313 }
1314
1315 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1316 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1317 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1318 }
1319 } else if (UseSHA512Intrinsics) {
1320 if (!FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1321 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1322 }
1323 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1324 }
1325
1326 if (UseSHA && ((supports_evex() && supports_avx512vlbw()) ||
1327 (EnableX86ECoreOpts && !supports_hybrid()))) {
1328 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1329 FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1330 }
1331 } else if (UseSHA3Intrinsics) {
1332 if (!FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1333 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1334 }
1335 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1336 }
1337
1338 #ifdef COMPILER2
1339 int max_vector_size = 0;
1340 if (UseAVX == 0 || !os_supports_avx_vectors()) {
1341 // 16 byte vectors (in XMM) are supported with SSE2+
1342 max_vector_size = 16;
1343 } else if (UseAVX == 1 || UseAVX == 2) {
1344 // 32 bytes vectors (in YMM) are only supported with AVX+
1345 max_vector_size = 32;
1346 } else if (UseAVX > 2) {
1347 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1348 max_vector_size = 64;
1349 }
1350
1351 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1352
1353 if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1354 if (MaxVectorSize < min_vector_size) {
1355 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1356 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1357 }
1358 if (MaxVectorSize > max_vector_size) {
1359 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1360 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1361 }
1362 if (!is_power_of_2(MaxVectorSize)) {
1363 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1364 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1365 }
1366 } else {
1367 // If default, use highest supported configuration
1368 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1369 }
1370
1371 #ifdef ASSERT
1372 if (MaxVectorSize > 0) {
1373 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1374 tty->print_cr("State of YMM registers after signal handle:");
1375 int nreg = 4;
1376 const char* ymm_name[4] = {"0", "7", "8", "15"};
1377 for (int i = 0; i < nreg; i++) {
1378 tty->print("YMM%s:", ymm_name[i]);
1379 for (int j = 7; j >=0; j--) {
1380 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1381 }
1382 tty->cr();
1383 }
1384 }
1385 }
1386 #endif // ASSERT
1387
1388 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1389 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1390 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1391 }
1392 } else if (UsePoly1305Intrinsics) {
1393 if (!FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1394 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1395 }
1396 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1397 }
1398
1399 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1400 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1401 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1402 }
1403 } else if (UseIntPolyIntrinsics) {
1404 if (!FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1405 warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1406 }
1407 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1408 }
1409
1410 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1411 UseMultiplyToLenIntrinsic = true;
1412 }
1413 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1414 UseSquareToLenIntrinsic = true;
1415 }
1416 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1417 UseMulAddIntrinsic = true;
1418 }
1419 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1420 UseMontgomeryMultiplyIntrinsic = true;
1421 }
1422 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1423 UseMontgomerySquareIntrinsic = true;
1424 }
1425 #endif // COMPILER2
1426
1427 // On new cpus instructions which update whole XMM register should be used
1428 // to prevent partial register stall due to dependencies on high half.
1429 //
1430 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1431 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1432 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1433 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1434
1435
1436 if (is_zx()) { // ZX cpus specific settings
1437 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1438 UseStoreImmI16 = false; // don't use it on ZX cpus
1439 }
1440 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1441 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1442 // Use it on all ZX cpus
1443 UseAddressNop = true;
1444 }
1445 }
1446 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1447 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1448 }
1449 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1450 if (supports_sse3()) {
1451 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1452 } else {
1453 UseXmmRegToRegMoveAll = false;
1454 }
1455 }
1456 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1457 #ifdef COMPILER2
1458 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1459 // For new ZX cpus do the next optimization:
1460 // don't align the beginning of a loop if there are enough instructions
1461 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1462 // in current fetch line (OptoLoopAlignment) or the padding
1463 // is big (> MaxLoopPad).
1464 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1465 // generated NOP instructions. 11 is the largest size of one
1466 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1467 MaxLoopPad = 11;
1468 }
1469 #endif // COMPILER2
1470 if (supports_sse4_2()) { // new ZX cpus
1471 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1472 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1473 }
1474 }
1475 }
1476
1477 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1478 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1479 }
1480 }
1481
1482 if (is_amd_family()) { // AMD cpus specific settings
1483 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1484 // Use it on new AMD cpus starting from Opteron.
1485 UseAddressNop = true;
1486 }
1487 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1488 if (supports_sse4a()) {
1489 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1490 } else {
1491 UseXmmLoadAndClearUpper = false;
1492 }
1493 }
1494 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1495 if (supports_sse4a()) {
1496 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1497 } else {
1498 UseXmmRegToRegMoveAll = false;
1499 }
1500 }
1501 if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1502 if (supports_sse4a()) {
1503 UseXmmI2F = true;
1504 } else {
1505 UseXmmI2F = false;
1506 }
1507 }
1508 if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1509 if (supports_sse4a()) {
1510 UseXmmI2D = true;
1511 } else {
1512 UseXmmI2D = false;
1513 }
1514 }
1515
1516 // some defaults for AMD family 15h
1517 if (cpu_family() == 0x15) {
1518 // On family 15h processors default is no sw prefetch
1519 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1520 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1521 }
1522 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1523 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1524 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1525 }
1526 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1527 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1528 }
1529 }
1530
1531 #ifdef COMPILER2
1532 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1533 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1534 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1535 }
1536 #endif // COMPILER2
1537
1538 // Some defaults for AMD family >= 17h && Hygon family 18h
1539 if (cpu_family() >= 0x17) {
1540 // On family >=17h processors use XMM and UnalignedLoadStores
1541 // for Array Copy
1542 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1543 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1544 }
1545 #ifdef COMPILER2
1546 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1547 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1548 }
1549 #endif
1550 }
1551 }
1552
1553 if (is_intel()) { // Intel cpus specific settings
1554 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1555 UseStoreImmI16 = false; // don't use it on Intel cpus
1556 }
1557 if (is_intel_server_family() || cpu_family() == 15) {
1558 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1559 // Use it on all Intel cpus starting from PentiumPro
1560 UseAddressNop = true;
1561 }
1562 }
1563 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1564 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1565 }
1566 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1567 if (supports_sse3()) {
1568 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1569 } else {
1570 UseXmmRegToRegMoveAll = false;
1571 }
1572 }
1573 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1574 #ifdef COMPILER2
1575 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1576 // For new Intel cpus do the next optimization:
1577 // don't align the beginning of a loop if there are enough instructions
1578 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1579 // in current fetch line (OptoLoopAlignment) or the padding
1580 // is big (> MaxLoopPad).
1581 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1582 // generated NOP instructions. 11 is the largest size of one
1583 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1584 MaxLoopPad = 11;
1585 }
1586 #endif // COMPILER2
1587
1588 if (is_intel_modern_cpu()) { // Newest Intel cpus
1589 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1590 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1591 }
1592 }
1593 }
1594 if (is_atom_family() || is_knights_family()) {
1595 #ifdef COMPILER2
1596 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1597 OptoScheduling = true;
1598 }
1599 #endif
1600 if (supports_sse4_2()) { // Silvermont
1601 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1602 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1603 }
1604 }
1605 if (FLAG_IS_DEFAULT(UseIncDec)) {
1606 FLAG_SET_DEFAULT(UseIncDec, false);
1607 }
1608 }
1609 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1610 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1611 }
1612 }
1613
1614 #ifdef COMPILER2
1615 if (UseAVX > 2) {
1616 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1617 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1618 ArrayOperationPartialInlineSize != 0 &&
1619 ArrayOperationPartialInlineSize != 16 &&
1620 ArrayOperationPartialInlineSize != 32 &&
1621 ArrayOperationPartialInlineSize != 64)) {
1622 int inline_size = 0;
1623 if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1624 inline_size = 64;
1625 } else if (MaxVectorSize >= 32) {
1626 inline_size = 32;
1627 } else if (MaxVectorSize >= 16) {
1628 inline_size = 16;
1629 }
1630 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1631 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1632 }
1633 ArrayOperationPartialInlineSize = inline_size;
1634 }
1635
1636 if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1637 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1638 if (ArrayOperationPartialInlineSize) {
1639 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1640 } else {
1641 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1642 }
1643 }
1644 }
1645
1646 if (FLAG_IS_DEFAULT(OptimizeFill)) {
1647 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1648 OptimizeFill = false;
1649 }
1650 }
1651 #endif
1652 if (supports_sse4_2()) {
1653 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1654 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1655 }
1656 } else if (UseSSE42Intrinsics) {
1657 if (!FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1658 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1659 }
1660 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1661 }
1662 if (UseSSE42Intrinsics) {
1663 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1664 UseVectorizedMismatchIntrinsic = true;
1665 }
1666 } else if (UseVectorizedMismatchIntrinsic) {
1667 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1668 warning("vectorizedMismatch intrinsics are not available on this CPU");
1669 }
1670 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1671 }
1672 if (UseAVX >= 2) {
1673 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1674 } else if (UseVectorizedHashCodeIntrinsic) {
1675 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1676 warning("vectorizedHashCode intrinsics are not available on this CPU");
1677 }
1678 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1679 }
1680
1681 // Use count trailing zeros instruction if available
1682 if (supports_bmi1()) {
1683 // tzcnt does not require VEX prefix
1684 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1685 UseCountTrailingZerosInstruction = true;
1686 }
1687 } else if (UseCountTrailingZerosInstruction) {
1688 if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1689 warning("tzcnt instruction is not available on this CPU");
1690 }
1691 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1692 }
1693
1694 // Use fast-string operations if available.
1695 if (supports_erms()) {
1696 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1697 UseFastStosb = true;
1698 }
1699 } else if (UseFastStosb) {
1700 if (!FLAG_IS_DEFAULT(UseFastStosb)) {
1701 warning("fast-string operations are not available on this CPU");
1702 }
1703 FLAG_SET_DEFAULT(UseFastStosb, false);
1704 }
1705
1706 // For AMD Processors use XMM/YMM MOVDQU instructions
1707 // for Object Initialization as default
1708 if (is_amd() && cpu_family() >= 0x19) {
1709 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1710 UseFastStosb = false;
1711 }
1712 }
1713
1714 #ifdef COMPILER2
1715 if (is_intel() && MaxVectorSize > 16) {
1716 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1717 UseFastStosb = false;
1718 }
1719 }
1720 #endif
1721
1722 // Use XMM/YMM MOVDQU instruction for Object Initialization
1723 if (!UseFastStosb && UseUnalignedLoadStores) {
1724 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1725 UseXMMForObjInit = true;
1726 }
1727 } else if (UseXMMForObjInit) {
1728 if (!FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1729 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1730 }
1731 FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1732 }
1733
1734 #ifdef COMPILER2
1735 if (FLAG_IS_DEFAULT(AlignVector)) {
1736 // Modern processors allow misaligned memory operations for vectors.
1737 AlignVector = !UseUnalignedLoadStores;
1738 }
1739 #endif // COMPILER2
1740
1741 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1742 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1743 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1744 }
1745 }
1746
1747 // Allocation prefetch settings
1748 int cache_line_size = checked_cast<int>(prefetch_data_size());
1749 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1750 (cache_line_size > AllocatePrefetchStepSize)) {
1751 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1752 }
1753
1754 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1755 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1756 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1757 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1758 }
1759 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1760 }
1761
1762 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1763 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1764 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1765 }
1766
1767 if (is_intel() && is_intel_server_family() && supports_sse3()) {
1768 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1769 is_intel_modern_cpu()) { // Nehalem based cpus
1770 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1771 }
1772 #ifdef COMPILER2
1773 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1774 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1775 }
1776 #endif
1777 }
1778
1779 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1780 #ifdef COMPILER2
1781 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1782 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1783 }
1784 #endif
1785 }
1786
1787 // Prefetch settings
1788
1789 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1790 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1791 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1792 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1793
1794 // gc copy/scan is disabled if prefetchw isn't supported, because
1795 // Prefetch::write emits an inlined prefetchw on Linux.
1796 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1797 // The used prefetcht0 instruction works for both amd64 and em64t.
1798
1799 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1800 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1801 }
1802 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1803 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1804 }
1805
1806 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1807 (cache_line_size > ContendedPaddingWidth))
1808 ContendedPaddingWidth = cache_line_size;
1809
1810 // This machine allows unaligned memory accesses
1811 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1812 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1813 }
1814
1815 #ifndef PRODUCT
1816 if (log_is_enabled(Info, os, cpu)) {
1817 LogStream ls(Log(os, cpu)::info());
1818 outputStream* log = &ls;
1819 log->print_cr("Logical CPUs per core: %u",
1820 logical_processors_per_package());
1821 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1822 log->print("UseSSE=%d", UseSSE);
1823 if (UseAVX > 0) {
1824 log->print(" UseAVX=%d", UseAVX);
1825 }
1826 if (UseAES) {
1827 log->print(" UseAES=1");
1828 }
1829 #ifdef COMPILER2
1830 if (MaxVectorSize > 0) {
1831 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1832 }
1833 #endif
1834 log->cr();
1835 log->print("Allocation");
1836 if (AllocatePrefetchStyle <= 0) {
1837 log->print_cr(": no prefetching");
1838 } else {
1839 log->print(" prefetching: ");
1840 if (AllocatePrefetchInstr == 0) {
1841 log->print("PREFETCHNTA");
1842 } else if (AllocatePrefetchInstr == 1) {
1843 log->print("PREFETCHT0");
1844 } else if (AllocatePrefetchInstr == 2) {
1845 log->print("PREFETCHT2");
1846 } else if (AllocatePrefetchInstr == 3) {
1847 log->print("PREFETCHW");
1848 }
1849 if (AllocatePrefetchLines > 1) {
1850 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1851 } else {
1852 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1853 }
1854 }
1855
1856 if (PrefetchCopyIntervalInBytes > 0) {
1857 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1858 }
1859 if (PrefetchScanIntervalInBytes > 0) {
1860 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1861 }
1862 if (ContendedPaddingWidth > 0) {
1863 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1864 }
1865 }
1866 #endif // !PRODUCT
1867 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1868 FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1869 }
1870 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1871 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1872 }
1873 // CopyAVX3Threshold is the threshold at which 64-byte vector instructions
1874 // are used for implementing the array copy, fill and clear operations.
1875 // The Intel platforms that support the serialize instruction and the AMD
1876 // platforms with native 512-bit datapath have improved implementation of
1877 // 64-byte load/stores and so the default threshold is set to 0 for these
1878 // platforms.
1879 if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
1880 if (is_intel() && is_intel_server_family() && supports_serialize()) {
1881 FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1882 } else if (is_amd() && is_amd_avx512_datapath_server_family()) {
1883 FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1884 } else {
1885 FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
1886 }
1887 }
1888 }
1889
1890 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1891 VirtualizationType vrt = VM_Version::get_detected_virtualization();
1892 if (vrt == XenHVM) {
1893 st->print_cr("Xen hardware-assisted virtualization detected");
1894 } else if (vrt == KVM) {
1895 st->print_cr("KVM virtualization detected");
1896 } else if (vrt == VMWare) {
1897 st->print_cr("VMWare virtualization detected");
1898 VirtualizationSupport::print_virtualization_info(st);
1899 } else if (vrt == HyperV) {
1900 st->print_cr("Hyper-V virtualization detected");
1901 } else if (vrt == HyperVRole) {
1902 st->print_cr("Hyper-V role detected");
1903 }
1904 }
1905
1906 bool VM_Version::compute_has_intel_jcc_erratum() {
1907 if (!is_intel_family_core()) {
1908 // Only Intel CPUs are affected.
1909 return false;
1910 }
1911 // The following table of affected CPUs is based on the following document released by Intel:
1912 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1913 switch (_model) {
1914 case 0x8E:
1915 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1916 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1917 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1918 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1919 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1920 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1921 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1922 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1923 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1924 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1925 case 0x4E:
1926 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1927 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1928 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1929 return _stepping == 0x3;
1930 case 0x55:
1931 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1932 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1933 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1934 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1935 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1936 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1937 return _stepping == 0x4 || _stepping == 0x7;
1938 case 0x5E:
1939 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1940 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1941 return _stepping == 0x3;
1942 case 0x9E:
1943 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1944 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1945 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1946 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1947 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1948 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1949 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1950 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1951 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1952 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1953 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1954 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1955 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1956 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1957 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1958 case 0xA5:
1959 // Not in Intel documentation.
1960 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
1961 return true;
1962 case 0xA6:
1963 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
1964 return _stepping == 0x0;
1965 case 0xAE:
1966 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
1967 return _stepping == 0xA;
1968 default:
1969 // If we are running on another intel machine not recognized in the table, we are okay.
1970 return false;
1971 }
1972 }
1973
1974 // On Xen, the cpuid instruction returns
1975 // eax / registers[0]: Version of Xen
1976 // ebx / registers[1]: chars 'XenV'
1977 // ecx / registers[2]: chars 'MMXe'
1978 // edx / registers[3]: chars 'nVMM'
1979 //
1980 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
1981 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
1982 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
1983 // edx / registers[3]: chars 'M' / 'ware' / 't Hv'
1984 //
1985 // more information :
1986 // https://kb.vmware.com/s/article/1009458
1987 //
1988 void VM_Version::check_virtualizations() {
1989 uint32_t registers[4] = {0};
1990 char signature[13] = {0};
1991
1992 // Xen cpuid leaves can be found 0x100 aligned boundary starting
1993 // from 0x40000000 until 0x40010000.
1994 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
1995 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
1996 detect_virt_stub(leaf, registers);
1997 memcpy(signature, ®isters[1], 12);
1998
1999 if (strncmp("VMwareVMware", signature, 12) == 0) {
2000 Abstract_VM_Version::_detected_virtualization = VMWare;
2001 // check for extended metrics from guestlib
2002 VirtualizationSupport::initialize();
2003 } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2004 Abstract_VM_Version::_detected_virtualization = HyperV;
2005 #ifdef _WINDOWS
2006 // CPUID leaf 0x40000007 is available to the root partition only.
2007 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2008 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2009 detect_virt_stub(0x40000007, registers);
2010 if ((registers[0] != 0x0) ||
2011 (registers[1] != 0x0) ||
2012 (registers[2] != 0x0) ||
2013 (registers[3] != 0x0)) {
2014 Abstract_VM_Version::_detected_virtualization = HyperVRole;
2015 }
2016 #endif
2017 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2018 Abstract_VM_Version::_detected_virtualization = KVM;
2019 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2020 Abstract_VM_Version::_detected_virtualization = XenHVM;
2021 }
2022 }
2023 }
2024
2025 #ifdef COMPILER2
2026 // Determine if it's running on Cascade Lake using default options.
2027 bool VM_Version::is_default_intel_cascade_lake() {
2028 return FLAG_IS_DEFAULT(UseAVX) &&
2029 FLAG_IS_DEFAULT(MaxVectorSize) &&
2030 UseAVX > 2 &&
2031 is_intel_cascade_lake();
2032 }
2033 #endif
2034
2035 bool VM_Version::is_intel_cascade_lake() {
2036 return is_intel_skylake() && _stepping >= 5;
2037 }
2038
2039 bool VM_Version::is_intel_darkmont() {
2040 return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2041 }
2042
2043 void VM_Version::clear_apx_test_state() {
2044 clear_apx_test_state_stub();
2045 }
2046
2047 static bool _vm_version_initialized = false;
2048
2049 void VM_Version::initialize() {
2050 ResourceMark rm;
2051
2052 // Making this stub must be FIRST use of assembler
2053 stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2054 if (stub_blob == nullptr) {
2055 vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2056 }
2057 CodeBuffer c(stub_blob);
2058 VM_Version_StubGenerator g(&c);
2059
2060 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2061 g.generate_get_cpu_info());
2062 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2063 g.generate_detect_virt());
2064 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2065 g.clear_apx_test_state());
2066 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2067 g.generate_getCPUIDBrandString());
2068 get_processor_features();
2069
2070 Assembler::precompute_instructions();
2071
2072 if (VM_Version::supports_hv()) { // Supports hypervisor
2073 check_virtualizations();
2074 }
2075 _vm_version_initialized = true;
2076 }
2077
2078 typedef enum {
2079 CPU_FAMILY_8086_8088 = 0,
2080 CPU_FAMILY_INTEL_286 = 2,
2081 CPU_FAMILY_INTEL_386 = 3,
2082 CPU_FAMILY_INTEL_486 = 4,
2083 CPU_FAMILY_PENTIUM = 5,
2084 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models
2085 CPU_FAMILY_PENTIUM_4 = 0xF
2086 } FamilyFlag;
2087
2088 typedef enum {
2089 RDTSCP_FLAG = 0x08000000, // bit 27
2090 INTEL64_FLAG = 0x20000000 // bit 29
2091 } _featureExtendedEdxFlag;
2092
2093 typedef enum {
2094 FPU_FLAG = 0x00000001,
2095 VME_FLAG = 0x00000002,
2096 DE_FLAG = 0x00000004,
2097 PSE_FLAG = 0x00000008,
2098 TSC_FLAG = 0x00000010,
2099 MSR_FLAG = 0x00000020,
2100 PAE_FLAG = 0x00000040,
2101 MCE_FLAG = 0x00000080,
2102 CX8_FLAG = 0x00000100,
2103 APIC_FLAG = 0x00000200,
2104 SEP_FLAG = 0x00000800,
2105 MTRR_FLAG = 0x00001000,
2106 PGE_FLAG = 0x00002000,
2107 MCA_FLAG = 0x00004000,
2108 CMOV_FLAG = 0x00008000,
2109 PAT_FLAG = 0x00010000,
2110 PSE36_FLAG = 0x00020000,
2111 PSNUM_FLAG = 0x00040000,
2112 CLFLUSH_FLAG = 0x00080000,
2113 DTS_FLAG = 0x00200000,
2114 ACPI_FLAG = 0x00400000,
2115 MMX_FLAG = 0x00800000,
2116 FXSR_FLAG = 0x01000000,
2117 SSE_FLAG = 0x02000000,
2118 SSE2_FLAG = 0x04000000,
2119 SS_FLAG = 0x08000000,
2120 HTT_FLAG = 0x10000000,
2121 TM_FLAG = 0x20000000
2122 } FeatureEdxFlag;
2123
2124 // VM_Version statics
2125 enum {
2126 ExtendedFamilyIdLength_INTEL = 16,
2127 ExtendedFamilyIdLength_AMD = 24
2128 };
2129
2130 const size_t VENDOR_LENGTH = 13;
2131 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2132 static char* _cpu_brand_string = nullptr;
2133 static int64_t _max_qualified_cpu_frequency = 0;
2134
2135 static int _no_of_threads = 0;
2136 static int _no_of_cores = 0;
2137
2138 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2139 "8086/8088",
2140 "",
2141 "286",
2142 "386",
2143 "486",
2144 "Pentium",
2145 "Pentium Pro", //or Pentium-M/Woodcrest depending on model
2146 "",
2147 "",
2148 "",
2149 "",
2150 "",
2151 "",
2152 "",
2153 "",
2154 "Pentium 4"
2155 };
2156
2157 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2158 "",
2159 "",
2160 "",
2161 "",
2162 "5x86",
2163 "K5/K6",
2164 "Athlon/AthlonXP",
2165 "",
2166 "",
2167 "",
2168 "",
2169 "",
2170 "",
2171 "",
2172 "",
2173 "Opteron/Athlon64",
2174 "Opteron QC/Phenom", // Barcelona et.al.
2175 "",
2176 "",
2177 "",
2178 "",
2179 "",
2180 "",
2181 "Zen"
2182 };
2183 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2184 // September 2013, Vol 3C Table 35-1
2185 const char* const _model_id_pentium_pro[] = {
2186 "",
2187 "Pentium Pro",
2188 "",
2189 "Pentium II model 3",
2190 "",
2191 "Pentium II model 5/Xeon/Celeron",
2192 "Celeron",
2193 "Pentium III/Pentium III Xeon",
2194 "Pentium III/Pentium III Xeon",
2195 "Pentium M model 9", // Yonah
2196 "Pentium III, model A",
2197 "Pentium III, model B",
2198 "",
2199 "Pentium M model D", // Dothan
2200 "",
2201 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2202 "",
2203 "",
2204 "",
2205 "",
2206 "",
2207 "",
2208 "Celeron", // 0x16 Celeron 65nm
2209 "Core 2", // 0x17 Penryn / Harpertown
2210 "",
2211 "",
2212 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP
2213 "Atom", // 0x1B Z5xx series Silverthorn
2214 "",
2215 "Core 2", // 0x1D Dunnington (6-core)
2216 "Nehalem", // 0x1E CPU_MODEL_NEHALEM
2217 "",
2218 "",
2219 "",
2220 "",
2221 "",
2222 "",
2223 "Westmere", // 0x25 CPU_MODEL_WESTMERE
2224 "",
2225 "",
2226 "", // 0x28
2227 "",
2228 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3"
2229 "",
2230 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP
2231 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2232 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX
2233 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX
2234 "",
2235 "",
2236 "",
2237 "",
2238 "",
2239 "",
2240 "",
2241 "",
2242 "",
2243 "",
2244 "Ivy Bridge", // 0x3a
2245 "",
2246 "Haswell", // 0x3c "4th Generation Intel Core Processor"
2247 "", // 0x3d "Next Generation Intel Core Processor"
2248 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2249 "", // 0x3f "Future Generation Intel Xeon Processor"
2250 "",
2251 "",
2252 "",
2253 "",
2254 "",
2255 "Haswell", // 0x45 "4th Generation Intel Core Processor"
2256 "Haswell", // 0x46 "4th Generation Intel Core Processor"
2257 nullptr
2258 };
2259
2260 /* Brand ID is for back compatibility
2261 * Newer CPUs uses the extended brand string */
2262 const char* const _brand_id[] = {
2263 "",
2264 "Celeron processor",
2265 "Pentium III processor",
2266 "Intel Pentium III Xeon processor",
2267 "",
2268 "",
2269 "",
2270 "",
2271 "Intel Pentium 4 processor",
2272 nullptr
2273 };
2274
2275
2276 const char* const _feature_edx_id[] = {
2277 "On-Chip FPU",
2278 "Virtual Mode Extensions",
2279 "Debugging Extensions",
2280 "Page Size Extensions",
2281 "Time Stamp Counter",
2282 "Model Specific Registers",
2283 "Physical Address Extension",
2284 "Machine Check Exceptions",
2285 "CMPXCHG8B Instruction",
2286 "On-Chip APIC",
2287 "",
2288 "Fast System Call",
2289 "Memory Type Range Registers",
2290 "Page Global Enable",
2291 "Machine Check Architecture",
2292 "Conditional Mov Instruction",
2293 "Page Attribute Table",
2294 "36-bit Page Size Extension",
2295 "Processor Serial Number",
2296 "CLFLUSH Instruction",
2297 "",
2298 "Debug Trace Store feature",
2299 "ACPI registers in MSR space",
2300 "Intel Architecture MMX Technology",
2301 "Fast Float Point Save and Restore",
2302 "Streaming SIMD extensions",
2303 "Streaming SIMD extensions 2",
2304 "Self-Snoop",
2305 "Hyper Threading",
2306 "Thermal Monitor",
2307 "",
2308 "Pending Break Enable"
2309 };
2310
2311 const char* const _feature_extended_edx_id[] = {
2312 "",
2313 "",
2314 "",
2315 "",
2316 "",
2317 "",
2318 "",
2319 "",
2320 "",
2321 "",
2322 "",
2323 "SYSCALL/SYSRET",
2324 "",
2325 "",
2326 "",
2327 "",
2328 "",
2329 "",
2330 "",
2331 "",
2332 "Execute Disable Bit",
2333 "",
2334 "",
2335 "",
2336 "",
2337 "",
2338 "",
2339 "RDTSCP",
2340 "",
2341 "Intel 64 Architecture",
2342 "",
2343 ""
2344 };
2345
2346 const char* const _feature_ecx_id[] = {
2347 "Streaming SIMD Extensions 3",
2348 "PCLMULQDQ",
2349 "64-bit DS Area",
2350 "MONITOR/MWAIT instructions",
2351 "CPL Qualified Debug Store",
2352 "Virtual Machine Extensions",
2353 "Safer Mode Extensions",
2354 "Enhanced Intel SpeedStep technology",
2355 "Thermal Monitor 2",
2356 "Supplemental Streaming SIMD Extensions 3",
2357 "L1 Context ID",
2358 "",
2359 "Fused Multiply-Add",
2360 "CMPXCHG16B",
2361 "xTPR Update Control",
2362 "Perfmon and Debug Capability",
2363 "",
2364 "Process-context identifiers",
2365 "Direct Cache Access",
2366 "Streaming SIMD extensions 4.1",
2367 "Streaming SIMD extensions 4.2",
2368 "x2APIC",
2369 "MOVBE",
2370 "Popcount instruction",
2371 "TSC-Deadline",
2372 "AESNI",
2373 "XSAVE",
2374 "OSXSAVE",
2375 "AVX",
2376 "F16C",
2377 "RDRAND",
2378 ""
2379 };
2380
2381 const char* const _feature_extended_ecx_id[] = {
2382 "LAHF/SAHF instruction support",
2383 "Core multi-processor legacy mode",
2384 "",
2385 "",
2386 "",
2387 "Advanced Bit Manipulations: LZCNT",
2388 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2389 "Misaligned SSE mode",
2390 "",
2391 "",
2392 "",
2393 "",
2394 "",
2395 "",
2396 "",
2397 "",
2398 "",
2399 "",
2400 "",
2401 "",
2402 "",
2403 "",
2404 "",
2405 "",
2406 "",
2407 "",
2408 "",
2409 "",
2410 "",
2411 "",
2412 "",
2413 ""
2414 };
2415
2416 const char* VM_Version::cpu_model_description(void) {
2417 uint32_t cpu_family = extended_cpu_family();
2418 uint32_t cpu_model = extended_cpu_model();
2419 const char* model = nullptr;
2420
2421 if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2422 for (uint32_t i = 0; i <= cpu_model; i++) {
2423 model = _model_id_pentium_pro[i];
2424 if (model == nullptr) {
2425 break;
2426 }
2427 }
2428 }
2429 return model;
2430 }
2431
2432 const char* VM_Version::cpu_brand_string(void) {
2433 if (_cpu_brand_string == nullptr) {
2434 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2435 if (nullptr == _cpu_brand_string) {
2436 return nullptr;
2437 }
2438 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2439 if (ret_val != OS_OK) {
2440 FREE_C_HEAP_ARRAY(_cpu_brand_string);
2441 _cpu_brand_string = nullptr;
2442 }
2443 }
2444 return _cpu_brand_string;
2445 }
2446
2447 const char* VM_Version::cpu_brand(void) {
2448 const char* brand = nullptr;
2449
2450 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2451 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2452 brand = _brand_id[0];
2453 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2454 brand = _brand_id[i];
2455 }
2456 }
2457 return brand;
2458 }
2459
2460 bool VM_Version::cpu_is_em64t(void) {
2461 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2462 }
2463
2464 bool VM_Version::is_netburst(void) {
2465 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2466 }
2467
2468 bool VM_Version::supports_tscinv_ext(void) {
2469 if (!supports_tscinv_bit()) {
2470 return false;
2471 }
2472
2473 if (is_intel()) {
2474 return true;
2475 }
2476
2477 if (is_amd()) {
2478 return !is_amd_Barcelona();
2479 }
2480
2481 if (is_hygon()) {
2482 return true;
2483 }
2484
2485 return false;
2486 }
2487
2488 void VM_Version::resolve_cpu_information_details(void) {
2489
2490 // in future we want to base this information on proper cpu
2491 // and cache topology enumeration such as:
2492 // Intel 64 Architecture Processor Topology Enumeration
2493 // which supports system cpu and cache topology enumeration
2494 // either using 2xAPICIDs or initial APICIDs
2495
2496 // currently only rough cpu information estimates
2497 // which will not necessarily reflect the exact configuration of the system
2498
2499 // this is the number of logical hardware threads
2500 // visible to the operating system
2501 _no_of_threads = os::processor_count();
2502
2503 // find out number of threads per cpu package
2504 int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2505 if (threads_per_package == 0) {
2506 // Fallback code to avoid div by zero in subsequent code.
2507 // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2508 threads_per_package = threads_per_core() * cores_per_cpu();
2509 }
2510
2511 // use amount of threads visible to the process in order to guess number of sockets
2512 _no_of_sockets = _no_of_threads / threads_per_package;
2513
2514 // process might only see a subset of the total number of threads
2515 // from a single processor package. Virtualization/resource management for example.
2516 // If so then just write a hard 1 as num of pkgs.
2517 if (0 == _no_of_sockets) {
2518 _no_of_sockets = 1;
2519 }
2520
2521 // estimate the number of cores
2522 _no_of_cores = cores_per_cpu() * _no_of_sockets;
2523 }
2524
2525
2526 const char* VM_Version::cpu_family_description(void) {
2527 int cpu_family_id = extended_cpu_family();
2528 if (is_amd()) {
2529 if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2530 return _family_id_amd[cpu_family_id];
2531 }
2532 }
2533 if (is_intel()) {
2534 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2535 return cpu_model_description();
2536 }
2537 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2538 return _family_id_intel[cpu_family_id];
2539 }
2540 }
2541 if (is_zx()) {
2542 int cpu_model_id = extended_cpu_model();
2543 if (cpu_family_id == 7) {
2544 switch (cpu_model_id) {
2545 case 0x1B:
2546 return "wudaokou";
2547 case 0x3B:
2548 return "lujiazui";
2549 case 0x5B:
2550 return "yongfeng";
2551 case 0x6B:
2552 return "shijidadao";
2553 }
2554 } else if (cpu_family_id == 6) {
2555 return "zhangjiang";
2556 }
2557 }
2558 if (is_hygon()) {
2559 return "Dhyana";
2560 }
2561 return "Unknown x86";
2562 }
2563
2564 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2565 assert(buf != nullptr, "buffer is null!");
2566 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2567
2568 const char* cpu_type = nullptr;
2569 const char* x64 = nullptr;
2570
2571 if (is_intel()) {
2572 cpu_type = "Intel";
2573 x64 = cpu_is_em64t() ? " Intel64" : "";
2574 } else if (is_amd()) {
2575 cpu_type = "AMD";
2576 x64 = cpu_is_em64t() ? " AMD64" : "";
2577 } else if (is_zx()) {
2578 cpu_type = "Zhaoxin";
2579 x64 = cpu_is_em64t() ? " x86_64" : "";
2580 } else if (is_hygon()) {
2581 cpu_type = "Hygon";
2582 x64 = cpu_is_em64t() ? " AMD64" : "";
2583 } else {
2584 cpu_type = "Unknown x86";
2585 x64 = cpu_is_em64t() ? " x86_64" : "";
2586 }
2587
2588 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2589 cpu_type,
2590 cpu_family_description(),
2591 supports_ht() ? " (HT)" : "",
2592 supports_sse3() ? " SSE3" : "",
2593 supports_ssse3() ? " SSSE3" : "",
2594 supports_sse4_1() ? " SSE4.1" : "",
2595 supports_sse4_2() ? " SSE4.2" : "",
2596 supports_sse4a() ? " SSE4A" : "",
2597 is_netburst() ? " Netburst" : "",
2598 is_intel_family_core() ? " Core" : "",
2599 x64);
2600
2601 return OS_OK;
2602 }
2603
2604 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2605 assert(buf != nullptr, "buffer is null!");
2606 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2607 assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2608
2609 // invoke newly generated asm code to fetch CPU Brand String
2610 getCPUIDBrandString_stub(&_cpuid_info);
2611
2612 // fetch results into buffer
2613 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0;
2614 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1;
2615 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2;
2616 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2617 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2618 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2619 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2620 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2621 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2622 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2623 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2624 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2625
2626 return OS_OK;
2627 }
2628
2629 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2630 guarantee(buf != nullptr, "buffer is null!");
2631 guarantee(buf_len > 0, "buffer len not enough!");
2632
2633 unsigned int flag = 0;
2634 unsigned int fi = 0;
2635 size_t written = 0;
2636 const char* prefix = "";
2637
2638 #define WRITE_TO_BUF(string) \
2639 { \
2640 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2641 if (res < 0) { \
2642 return buf_len - 1; \
2643 } \
2644 written += res; \
2645 if (prefix[0] == '\0') { \
2646 prefix = ", "; \
2647 } \
2648 }
2649
2650 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2651 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2652 continue; /* no hyperthreading */
2653 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2654 continue; /* no fast system call */
2655 }
2656 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2657 WRITE_TO_BUF(_feature_edx_id[fi]);
2658 }
2659 }
2660
2661 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2662 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2663 WRITE_TO_BUF(_feature_ecx_id[fi]);
2664 }
2665 }
2666
2667 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2668 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2669 WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2670 }
2671 }
2672
2673 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2674 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2675 WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2676 }
2677 }
2678
2679 if (supports_tscinv_bit()) {
2680 WRITE_TO_BUF("Invariant TSC");
2681 }
2682
2683 if (supports_hybrid()) {
2684 WRITE_TO_BUF("Hybrid Architecture");
2685 }
2686
2687 return written;
2688 }
2689
2690 /**
2691 * Write a detailed description of the cpu to a given buffer, including
2692 * feature set.
2693 */
2694 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2695 assert(buf != nullptr, "buffer is null!");
2696 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2697
2698 static const char* unknown = "<unknown>";
2699 char vendor_id[VENDOR_LENGTH];
2700 const char* family = nullptr;
2701 const char* model = nullptr;
2702 const char* brand = nullptr;
2703 int outputLen = 0;
2704
2705 family = cpu_family_description();
2706 if (family == nullptr) {
2707 family = unknown;
2708 }
2709
2710 model = cpu_model_description();
2711 if (model == nullptr) {
2712 model = unknown;
2713 }
2714
2715 brand = cpu_brand_string();
2716
2717 if (brand == nullptr) {
2718 brand = cpu_brand();
2719 if (brand == nullptr) {
2720 brand = unknown;
2721 }
2722 }
2723
2724 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2725 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2726 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2727 vendor_id[VENDOR_LENGTH-1] = '\0';
2728
2729 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2730 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2731 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2732 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2733 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2734 "Supports: ",
2735 brand,
2736 vendor_id,
2737 family,
2738 extended_cpu_family(),
2739 model,
2740 extended_cpu_model(),
2741 cpu_stepping(),
2742 _cpuid_info.std_cpuid1_eax.bits.ext_family,
2743 _cpuid_info.std_cpuid1_eax.bits.ext_model,
2744 _cpuid_info.std_cpuid1_eax.bits.proc_type,
2745 _cpuid_info.std_cpuid1_eax.value,
2746 _cpuid_info.std_cpuid1_ebx.value,
2747 _cpuid_info.std_cpuid1_ecx.value,
2748 _cpuid_info.std_cpuid1_edx.value,
2749 _cpuid_info.ext_cpuid1_eax,
2750 _cpuid_info.ext_cpuid1_ebx,
2751 _cpuid_info.ext_cpuid1_ecx,
2752 _cpuid_info.ext_cpuid1_edx);
2753
2754 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2755 if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2756 return OS_ERR;
2757 }
2758
2759 cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2760
2761 return OS_OK;
2762 }
2763
2764
2765 // Fill in Abstract_VM_Version statics
2766 void VM_Version::initialize_cpu_information() {
2767 assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2768 assert(!_initialized, "shouldn't be initialized yet");
2769 resolve_cpu_information_details();
2770
2771 // initialize cpu_name and cpu_desc
2772 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2773 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2774 _initialized = true;
2775 }
2776
2777 /**
2778 * For information about extracting the frequency from the cpu brand string, please see:
2779 *
2780 * Intel Processor Identification and the CPUID Instruction
2781 * Application Note 485
2782 * May 2012
2783 *
2784 * The return value is the frequency in Hz.
2785 */
2786 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2787 const char* const brand_string = cpu_brand_string();
2788 if (brand_string == nullptr) {
2789 return 0;
2790 }
2791 const int64_t MEGA = 1000000;
2792 int64_t multiplier = 0;
2793 int64_t frequency = 0;
2794 uint8_t idx = 0;
2795 // The brand string buffer is at most 48 bytes.
2796 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2797 for (; idx < 48-2; ++idx) {
2798 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2799 // Search brand string for "yHz" where y is M, G, or T.
2800 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2801 if (brand_string[idx] == 'M') {
2802 multiplier = MEGA;
2803 } else if (brand_string[idx] == 'G') {
2804 multiplier = MEGA * 1000;
2805 } else if (brand_string[idx] == 'T') {
2806 multiplier = MEGA * MEGA;
2807 }
2808 break;
2809 }
2810 }
2811 if (multiplier > 0) {
2812 // Compute frequency (in Hz) from brand string.
2813 if (brand_string[idx-3] == '.') { // if format is "x.xx"
2814 frequency = (brand_string[idx-4] - '0') * multiplier;
2815 frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2816 frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2817 } else { // format is "xxxx"
2818 frequency = (brand_string[idx-4] - '0') * 1000;
2819 frequency += (brand_string[idx-3] - '0') * 100;
2820 frequency += (brand_string[idx-2] - '0') * 10;
2821 frequency += (brand_string[idx-1] - '0');
2822 frequency *= multiplier;
2823 }
2824 }
2825 return frequency;
2826 }
2827
2828
2829 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2830 if (_max_qualified_cpu_frequency == 0) {
2831 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2832 }
2833 return _max_qualified_cpu_frequency;
2834 }
2835
2836 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2837 VM_Features vm_features;
2838
2839 // check the features that must be present
2840 guarantee(std_cpuid1_edx.bits.sse2 != 0, "sse2 is not supported");
2841 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
2842 // clflush_size is size in quadwords (8 bytes).
2843 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == ICache::line_size/8, "clflush size is not supported");
2844
2845 // sse and sse2 are guaranteed to be present
2846 vm_features.set_feature(CPU_SSE);
2847 vm_features.set_feature(CPU_SSE2);
2848
2849 if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2850 vm_features.set_feature(CPU_CX8);
2851 if (std_cpuid1_edx.bits.cmov != 0)
2852 vm_features.set_feature(CPU_CMOV);
2853 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2854 ext_cpuid1_edx.bits.fxsr != 0))
2855 vm_features.set_feature(CPU_FXSR);
2856 // HT flag is set for multi-core processors also.
2857 if (threads_per_core() > 1)
2858 vm_features.set_feature(CPU_HT);
2859 if (std_cpuid1_ecx.bits.sse3 != 0)
2860 vm_features.set_feature(CPU_SSE3);
2861 if (std_cpuid1_ecx.bits.ssse3 != 0)
2862 vm_features.set_feature(CPU_SSSE3);
2863 if (std_cpuid1_ecx.bits.sse4_1 != 0)
2864 vm_features.set_feature(CPU_SSE4_1);
2865 if (std_cpuid1_ecx.bits.sse4_2 != 0)
2866 vm_features.set_feature(CPU_SSE4_2);
2867 if (std_cpuid1_ecx.bits.popcnt != 0)
2868 vm_features.set_feature(CPU_POPCNT);
2869 if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2870 xem_xcr0_eax.bits.apx_f != 0 &&
2871 std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2872 vm_features.set_feature(CPU_APX_F);
2873 }
2874 if (std_cpuid1_ecx.bits.avx != 0 &&
2875 std_cpuid1_ecx.bits.osxsave != 0 &&
2876 xem_xcr0_eax.bits.sse != 0 &&
2877 xem_xcr0_eax.bits.ymm != 0) {
2878 vm_features.set_feature(CPU_AVX);
2879 vm_features.set_feature(CPU_VZEROUPPER);
2880 if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2881 vm_features.set_feature(CPU_SHA512);
2882 if (std_cpuid1_ecx.bits.f16c != 0)
2883 vm_features.set_feature(CPU_F16C);
2884 if (sef_cpuid7_ebx.bits.avx2 != 0) {
2885 vm_features.set_feature(CPU_AVX2);
2886 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2887 vm_features.set_feature(CPU_AVX_IFMA);
2888 }
2889 if (sef_cpuid7_ecx.bits.gfni != 0)
2890 vm_features.set_feature(CPU_GFNI);
2891 if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2892 xem_xcr0_eax.bits.opmask != 0 &&
2893 xem_xcr0_eax.bits.zmm512 != 0 &&
2894 xem_xcr0_eax.bits.zmm32 != 0) {
2895 vm_features.set_feature(CPU_AVX512F);
2896 if (sef_cpuid7_ebx.bits.avx512cd != 0)
2897 vm_features.set_feature(CPU_AVX512CD);
2898 if (sef_cpuid7_ebx.bits.avx512dq != 0)
2899 vm_features.set_feature(CPU_AVX512DQ);
2900 if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2901 vm_features.set_feature(CPU_AVX512_IFMA);
2902 if (sef_cpuid7_ebx.bits.avx512pf != 0)
2903 vm_features.set_feature(CPU_AVX512PF);
2904 if (sef_cpuid7_ebx.bits.avx512er != 0)
2905 vm_features.set_feature(CPU_AVX512ER);
2906 if (sef_cpuid7_ebx.bits.avx512bw != 0)
2907 vm_features.set_feature(CPU_AVX512BW);
2908 if (sef_cpuid7_ebx.bits.avx512vl != 0)
2909 vm_features.set_feature(CPU_AVX512VL);
2910 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2911 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2912 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2913 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2914 if (sef_cpuid7_ecx.bits.vaes != 0)
2915 vm_features.set_feature(CPU_AVX512_VAES);
2916 if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2917 vm_features.set_feature(CPU_AVX512_VNNI);
2918 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2919 vm_features.set_feature(CPU_AVX512_BITALG);
2920 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2921 vm_features.set_feature(CPU_AVX512_VBMI);
2922 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2923 vm_features.set_feature(CPU_AVX512_VBMI2);
2924 }
2925 if (is_intel()) {
2926 if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2927 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2928 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2929 xem_xcr0_eax.bits.opmask != 0 &&
2930 xem_xcr0_eax.bits.zmm512 != 0 &&
2931 xem_xcr0_eax.bits.zmm32 != 0) {
2932 vm_features.set_feature(CPU_AVX10_1);
2933 vm_features.set_feature(CPU_AVX512F);
2934 vm_features.set_feature(CPU_AVX512CD);
2935 vm_features.set_feature(CPU_AVX512DQ);
2936 vm_features.set_feature(CPU_AVX512PF);
2937 vm_features.set_feature(CPU_AVX512ER);
2938 vm_features.set_feature(CPU_AVX512BW);
2939 vm_features.set_feature(CPU_AVX512VL);
2940 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2941 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2942 vm_features.set_feature(CPU_AVX512_VAES);
2943 vm_features.set_feature(CPU_AVX512_VNNI);
2944 vm_features.set_feature(CPU_AVX512_BITALG);
2945 vm_features.set_feature(CPU_AVX512_VBMI);
2946 vm_features.set_feature(CPU_AVX512_VBMI2);
2947 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
2948 vm_features.set_feature(CPU_AVX10_2);
2949 }
2950 }
2951 }
2952 }
2953
2954 if (std_cpuid1_ecx.bits.hv != 0)
2955 vm_features.set_feature(CPU_HV);
2956 if (sef_cpuid7_ebx.bits.bmi1 != 0)
2957 vm_features.set_feature(CPU_BMI1);
2958 if (std_cpuid1_edx.bits.tsc != 0)
2959 vm_features.set_feature(CPU_TSC);
2960 if (ext_cpuid7_edx.bits.tsc_invariance != 0)
2961 vm_features.set_feature(CPU_TSCINV_BIT);
2962 if (std_cpuid1_ecx.bits.aes != 0)
2963 vm_features.set_feature(CPU_AES);
2964 if (ext_cpuid1_ecx.bits.lzcnt != 0)
2965 vm_features.set_feature(CPU_LZCNT);
2966 if (ext_cpuid1_ecx.bits.prefetchw != 0)
2967 vm_features.set_feature(CPU_3DNOW_PREFETCH);
2968 if (sef_cpuid7_ebx.bits.erms != 0)
2969 vm_features.set_feature(CPU_ERMS);
2970 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2971 vm_features.set_feature(CPU_FSRM);
2972 if (std_cpuid1_ecx.bits.clmul != 0)
2973 vm_features.set_feature(CPU_CLMUL);
2974 if (sef_cpuid7_ebx.bits.rtm != 0)
2975 vm_features.set_feature(CPU_RTM);
2976 if (sef_cpuid7_ebx.bits.adx != 0)
2977 vm_features.set_feature(CPU_ADX);
2978 if (sef_cpuid7_ebx.bits.bmi2 != 0)
2979 vm_features.set_feature(CPU_BMI2);
2980 if (sef_cpuid7_ebx.bits.sha != 0)
2981 vm_features.set_feature(CPU_SHA);
2982 if (std_cpuid1_ecx.bits.fma != 0)
2983 vm_features.set_feature(CPU_FMA);
2984 if (sef_cpuid7_ebx.bits.clflushopt != 0)
2985 vm_features.set_feature(CPU_FLUSHOPT);
2986 if (sef_cpuid7_ebx.bits.clwb != 0)
2987 vm_features.set_feature(CPU_CLWB);
2988 if (ext_cpuid1_edx.bits.rdtscp != 0)
2989 vm_features.set_feature(CPU_RDTSCP);
2990 if (sef_cpuid7_ecx.bits.rdpid != 0)
2991 vm_features.set_feature(CPU_RDPID);
2992
2993 // AMD|Hygon additional features.
2994 if (is_amd_family()) {
2995 // PREFETCHW was checked above, check TDNOW here.
2996 if ((ext_cpuid1_edx.bits.tdnow != 0))
2997 vm_features.set_feature(CPU_3DNOW_PREFETCH);
2998 if (ext_cpuid1_ecx.bits.sse4a != 0)
2999 vm_features.set_feature(CPU_SSE4A);
3000 }
3001
3002 // Intel additional features.
3003 if (is_intel()) {
3004 if (sef_cpuid7_edx.bits.serialize != 0)
3005 vm_features.set_feature(CPU_SERIALIZE);
3006 if (sef_cpuid7_edx.bits.hybrid != 0)
3007 vm_features.set_feature(CPU_HYBRID);
3008 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3009 vm_features.set_feature(CPU_AVX512_FP16);
3010 }
3011
3012 // ZX additional features.
3013 if (is_zx()) {
3014 // We do not know if these are supported by ZX, so we cannot trust
3015 // common CPUID bit for them.
3016 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3017 vm_features.clear_feature(CPU_CLWB);
3018 }
3019
3020 // Protection key features.
3021 if (sef_cpuid7_ecx.bits.pku != 0) {
3022 vm_features.set_feature(CPU_PKU);
3023 }
3024 if (sef_cpuid7_ecx.bits.ospke != 0) {
3025 vm_features.set_feature(CPU_OSPKE);
3026 }
3027
3028 // Control flow enforcement (CET) features.
3029 if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3030 vm_features.set_feature(CPU_CET_SS);
3031 }
3032 if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3033 vm_features.set_feature(CPU_CET_IBT);
3034 }
3035
3036 // Composite features.
3037 if (supports_tscinv_bit() &&
3038 ((is_amd_family() && !is_amd_Barcelona()) ||
3039 is_intel_tsc_synched_at_init())) {
3040 vm_features.set_feature(CPU_TSCINV);
3041 }
3042 return vm_features;
3043 }
3044
3045 bool VM_Version::os_supports_avx_vectors() {
3046 bool retVal = false;
3047 int nreg = 4;
3048 if (supports_evex()) {
3049 // Verify that OS save/restore all bits of EVEX registers
3050 // during signal processing.
3051 retVal = true;
3052 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3053 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3054 retVal = false;
3055 break;
3056 }
3057 }
3058 } else if (supports_avx()) {
3059 // Verify that OS save/restore all bits of AVX registers
3060 // during signal processing.
3061 retVal = true;
3062 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3063 if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3064 retVal = false;
3065 break;
3066 }
3067 }
3068 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3069 if (retVal == false) {
3070 // Verify that OS save/restore all bits of EVEX registers
3071 // during signal processing.
3072 retVal = true;
3073 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3074 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3075 retVal = false;
3076 break;
3077 }
3078 }
3079 }
3080 }
3081 return retVal;
3082 }
3083
3084 bool VM_Version::os_supports_apx_egprs() {
3085 if (!supports_apx_f()) {
3086 return false;
3087 }
3088 if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3089 _cpuid_info.apx_save[1] != egpr_test_value()) {
3090 return false;
3091 }
3092 return true;
3093 }
3094
3095 uint VM_Version::cores_per_cpu() {
3096 uint result = 1;
3097 if (is_intel()) {
3098 bool supports_topology = supports_processor_topology();
3099 if (supports_topology) {
3100 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3101 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3102 }
3103 if (!supports_topology || result == 0) {
3104 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3105 }
3106 } else if (is_amd_family()) {
3107 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3108 if (cpu_family() >= 0x17) { // Zen or later
3109 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3110 }
3111 } else if (is_zx()) {
3112 bool supports_topology = supports_processor_topology();
3113 if (supports_topology) {
3114 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3115 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3116 }
3117 if (!supports_topology || result == 0) {
3118 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3119 }
3120 }
3121 return result;
3122 }
3123
3124 uint VM_Version::threads_per_core() {
3125 uint result = 1;
3126 if (is_intel() && supports_processor_topology()) {
3127 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3128 } else if (is_zx() && supports_processor_topology()) {
3129 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3130 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3131 if (cpu_family() >= 0x17) {
3132 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3133 } else {
3134 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3135 cores_per_cpu();
3136 }
3137 }
3138 return (result == 0 ? 1 : result);
3139 }
3140
3141 uint VM_Version::L1_line_size() {
3142 uint result = 0;
3143 if (is_intel()) {
3144 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3145 } else if (is_amd_family()) {
3146 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3147 } else if (is_zx()) {
3148 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3149 }
3150 if (result < 32) // not defined ?
3151 result = 32; // 32 bytes by default on x86 and other x64
3152 return result;
3153 }
3154
3155 bool VM_Version::is_intel_tsc_synched_at_init() {
3156 if (is_intel_family_core()) {
3157 uint32_t ext_model = extended_cpu_model();
3158 if (ext_model == CPU_MODEL_NEHALEM_EP ||
3159 ext_model == CPU_MODEL_WESTMERE_EP ||
3160 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3161 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3162 // <= 2-socket invariant tsc support. EX versions are usually used
3163 // in > 2-socket systems and likely don't synchronize tscs at
3164 // initialization.
3165 // Code that uses tsc values must be prepared for them to arbitrarily
3166 // jump forward or backward.
3167 return true;
3168 }
3169 }
3170 return false;
3171 }
3172
3173 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3174 // Hardware prefetching (distance/size in bytes):
3175 // Pentium 3 - 64 / 32
3176 // Pentium 4 - 256 / 128
3177 // Athlon - 64 / 32 ????
3178 // Opteron - 128 / 64 only when 2 sequential cache lines accessed
3179 // Core - 128 / 64
3180 //
3181 // Software prefetching (distance in bytes / instruction with best score):
3182 // Pentium 3 - 128 / prefetchnta
3183 // Pentium 4 - 512 / prefetchnta
3184 // Athlon - 128 / prefetchnta
3185 // Opteron - 256 / prefetchnta
3186 // Core - 256 / prefetchnta
3187 // It will be used only when AllocatePrefetchStyle > 0
3188
3189 if (is_amd_family()) { // AMD | Hygon
3190 return 256; // Opteron
3191 } else if (is_zx()) {
3192 return 256;
3193 } else { // Intel
3194 if (supports_sse3() && is_intel_server_family()) {
3195 if (is_intel_modern_cpu()) { // Nehalem based cpus
3196 return 192;
3197 } else if (use_watermark_prefetch) { // watermark prefetching on Core
3198 return 384;
3199 }
3200 }
3201 if (is_intel_server_family()) {
3202 return 256; // Pentium M, Core, Core2
3203 } else {
3204 return 512; // Pentium 4
3205 }
3206 }
3207 }
3208
3209 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3210 assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3211 switch (id) {
3212 case vmIntrinsics::_floatToFloat16:
3213 case vmIntrinsics::_float16ToFloat:
3214 if (!supports_float16()) {
3215 return false;
3216 }
3217 break;
3218 default:
3219 break;
3220 }
3221 return true;
3222 }
3223
3224 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3225 int i = 0;
3226 ss.join([&]() {
3227 const char* str = nullptr;
3228 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3229 if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3230 str = _features_names[i];
3231 }
3232 i += 1;
3233 }
3234 return str;
3235 }, ", ");
3236 }
3237
3238 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3239 VM_Features* features = (VM_Features*)features_buffer;
3240 insert_features_names(*features, ss);
3241 }
3242
3243 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3244 VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3245 VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3246 int i = 0;
3247 ss.join([&]() {
3248 const char* str = nullptr;
3249 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3250 Feature_Flag flag = (Feature_Flag)i;
3251 if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3252 str = _features_names[i];
3253 }
3254 i += 1;
3255 }
3256 return str;
3257 }, ", ");
3258 }
3259
3260 int VM_Version::cpu_features_size() {
3261 return sizeof(VM_Features);
3262 }
3263
3264 void VM_Version::store_cpu_features(void* buf) {
3265 VM_Features copy = _features.aot_code_cache_features();
3266 memcpy(buf, ©, sizeof(VM_Features));
3267 }
3268
3269 bool VM_Version::verify_aot_code_cache_features(void* features_buffer) {
3270 VM_Features* features_to_test = (VM_Features*)features_buffer;
3271 VM_Features rt_features = _features.aot_code_cache_features();
3272 return rt_features.verify_aot_code_cache_features(features_to_test);
3273 }