1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/codeBlob.hpp"
29 #include "compiler/compilerDefinitions.inline.hpp"
30 #include "jvm.h"
31 #include "logging/log.hpp"
32 #include "logging/logStream.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "memory/universe.hpp"
35 #include "runtime/globals_extension.hpp"
36 #include "runtime/java.hpp"
37 #include "runtime/os.inline.hpp"
38 #include "runtime/stubCodeGenerator.hpp"
39 #include "runtime/vm_version.hpp"
40 #include "utilities/checkedCast.hpp"
41 #include "utilities/ostream.hpp"
42 #include "utilities/powerOfTwo.hpp"
43 #include "utilities/virtualizationSupport.hpp"
44
45 int VM_Version::_cpu;
46 int VM_Version::_model;
47 int VM_Version::_stepping;
48 bool VM_Version::_has_intel_jcc_erratum;
49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
50
51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
53 #undef DECLARE_CPU_FEATURE_NAME
54
55 // Address of instruction which causes SEGV
56 address VM_Version::_cpuinfo_segv_addr = nullptr;
57 // Address of instruction after the one which causes SEGV
58 address VM_Version::_cpuinfo_cont_addr = nullptr;
59 // Address of instruction which causes APX specific SEGV
60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
61 // Address of instruction after the one which causes APX specific SEGV
62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
63
64 static BufferBlob* stub_blob;
65 static const int stub_size = 2550;
66
67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
68
69 VM_Version::VM_Features VM_Version::_features;
70 VM_Version::VM_Features VM_Version::_cpu_features;
71
72 extern "C" {
73 typedef void (*get_cpu_info_stub_t)(void*);
74 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
75 typedef void (*clear_apx_test_state_t)(void);
76 typedef void (*getCPUIDBrandString_stub_t)(void*);
77 }
78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
79 static detect_virt_stub_t detect_virt_stub = nullptr;
80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
82
83 bool VM_Version::supports_clflush() {
84 // clflush should always be available on x86_64
85 // if not we are in real trouble because we rely on it
86 // to flush the code cache.
87 // Unfortunately, Assembler::clflush is currently called as part
88 // of generation of the code cache flush routine. This happens
89 // under Universe::init before the processor features are set
90 // up. Assembler::flush calls this routine to check that clflush
91 // is allowed. So, we give the caller a free pass if Universe init
92 // is still in progress.
93 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
94 return true;
95 }
96
97 #define CPUID_STANDARD_FN 0x0
98 #define CPUID_STANDARD_FN_1 0x1
99 #define CPUID_STANDARD_FN_4 0x4
100 #define CPUID_STANDARD_FN_B 0xb
101
102 #define CPUID_EXTENDED_FN 0x80000000
103 #define CPUID_EXTENDED_FN_1 0x80000001
104 #define CPUID_EXTENDED_FN_2 0x80000002
105 #define CPUID_EXTENDED_FN_3 0x80000003
106 #define CPUID_EXTENDED_FN_4 0x80000004
107 #define CPUID_EXTENDED_FN_7 0x80000007
108 #define CPUID_EXTENDED_FN_8 0x80000008
109
110 class VM_Version_StubGenerator: public StubCodeGenerator {
111 public:
112
113 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
114
115 address clear_apx_test_state() {
116 # define __ _masm->
117 address start = __ pc();
118 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
119 // handling guarantees that preserved register values post signal handling were
120 // re-instantiated by operating system and not because they were not modified externally.
121
122 bool save_apx = UseAPX;
123 VM_Version::set_apx_cpuFeatures();
124 UseAPX = true;
125 // EGPR state save/restoration.
126 __ mov64(r16, 0L);
127 __ mov64(r31, 0L);
128 UseAPX = save_apx;
129 VM_Version::clean_cpuFeatures();
130 __ ret(0);
131 return start;
132 }
133
134 address generate_get_cpu_info() {
135 // Flags to test CPU type.
136 const uint32_t HS_EFL_AC = 0x40000;
137 const uint32_t HS_EFL_ID = 0x200000;
138 // Values for when we don't have a CPUID instruction.
139 const int CPU_FAMILY_SHIFT = 8;
140 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
141 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
142 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
143
144 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
145 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
146 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
147 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
148
149 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
150 # define __ _masm->
151
152 address start = __ pc();
153
154 //
155 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
156 //
157 // rcx and rdx are first and second argument registers on windows
158
159 __ push(rbp);
160 __ mov(rbp, c_rarg0); // cpuid_info address
161 __ push(rbx);
162 __ push(rsi);
163 __ pushf(); // preserve rbx, and flags
164 __ pop(rax);
165 __ push(rax);
166 __ mov(rcx, rax);
167 //
168 // if we are unable to change the AC flag, we have a 386
169 //
170 __ xorl(rax, HS_EFL_AC);
171 __ push(rax);
172 __ popf();
173 __ pushf();
174 __ pop(rax);
175 __ cmpptr(rax, rcx);
176 __ jccb(Assembler::notEqual, detect_486);
177
178 __ movl(rax, CPU_FAMILY_386);
179 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
180 __ jmp(done);
181
182 //
183 // If we are unable to change the ID flag, we have a 486 which does
184 // not support the "cpuid" instruction.
185 //
186 __ bind(detect_486);
187 __ mov(rax, rcx);
188 __ xorl(rax, HS_EFL_ID);
189 __ push(rax);
190 __ popf();
191 __ pushf();
192 __ pop(rax);
193 __ cmpptr(rcx, rax);
194 __ jccb(Assembler::notEqual, detect_586);
195
196 __ bind(cpu486);
197 __ movl(rax, CPU_FAMILY_486);
198 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
199 __ jmp(done);
200
201 //
202 // At this point, we have a chip which supports the "cpuid" instruction
203 //
204 __ bind(detect_586);
205 __ xorl(rax, rax);
206 __ cpuid();
207 __ orl(rax, rax);
208 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
209 // value of at least 1, we give up and
210 // assume a 486
211 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
212 __ movl(Address(rsi, 0), rax);
213 __ movl(Address(rsi, 4), rbx);
214 __ movl(Address(rsi, 8), rcx);
215 __ movl(Address(rsi,12), rdx);
216
217 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
218 __ jccb(Assembler::belowEqual, std_cpuid4);
219
220 //
221 // cpuid(0xB) Processor Topology
222 //
223 __ movl(rax, 0xb);
224 __ xorl(rcx, rcx); // Threads level
225 __ cpuid();
226
227 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
228 __ movl(Address(rsi, 0), rax);
229 __ movl(Address(rsi, 4), rbx);
230 __ movl(Address(rsi, 8), rcx);
231 __ movl(Address(rsi,12), rdx);
232
233 __ movl(rax, 0xb);
234 __ movl(rcx, 1); // Cores level
235 __ cpuid();
236 __ push(rax);
237 __ andl(rax, 0x1f); // Determine if valid topology level
238 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
239 __ andl(rax, 0xffff);
240 __ pop(rax);
241 __ jccb(Assembler::equal, std_cpuid4);
242
243 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
244 __ movl(Address(rsi, 0), rax);
245 __ movl(Address(rsi, 4), rbx);
246 __ movl(Address(rsi, 8), rcx);
247 __ movl(Address(rsi,12), rdx);
248
249 __ movl(rax, 0xb);
250 __ movl(rcx, 2); // Packages level
251 __ cpuid();
252 __ push(rax);
253 __ andl(rax, 0x1f); // Determine if valid topology level
254 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
255 __ andl(rax, 0xffff);
256 __ pop(rax);
257 __ jccb(Assembler::equal, std_cpuid4);
258
259 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
260 __ movl(Address(rsi, 0), rax);
261 __ movl(Address(rsi, 4), rbx);
262 __ movl(Address(rsi, 8), rcx);
263 __ movl(Address(rsi,12), rdx);
264
265 //
266 // cpuid(0x4) Deterministic cache params
267 //
268 __ bind(std_cpuid4);
269 __ movl(rax, 4);
270 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
271 __ jccb(Assembler::greater, std_cpuid1);
272
273 __ xorl(rcx, rcx); // L1 cache
274 __ cpuid();
275 __ push(rax);
276 __ andl(rax, 0x1f); // Determine if valid cache parameters used
277 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
278 __ pop(rax);
279 __ jccb(Assembler::equal, std_cpuid1);
280
281 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
282 __ movl(Address(rsi, 0), rax);
283 __ movl(Address(rsi, 4), rbx);
284 __ movl(Address(rsi, 8), rcx);
285 __ movl(Address(rsi,12), rdx);
286
287 //
288 // Standard cpuid(0x1)
289 //
290 __ bind(std_cpuid1);
291 __ movl(rax, 1);
292 __ cpuid();
293 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
294 __ movl(Address(rsi, 0), rax);
295 __ movl(Address(rsi, 4), rbx);
296 __ movl(Address(rsi, 8), rcx);
297 __ movl(Address(rsi,12), rdx);
298
299 //
300 // Check if OS has enabled XGETBV instruction to access XCR0
301 // (OSXSAVE feature flag) and CPU supports AVX
302 //
303 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
304 __ cmpl(rcx, 0x18000000);
305 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
306
307 //
308 // XCR0, XFEATURE_ENABLED_MASK register
309 //
310 __ xorl(rcx, rcx); // zero for XCR0 register
311 __ xgetbv();
312 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
313 __ movl(Address(rsi, 0), rax);
314 __ movl(Address(rsi, 4), rdx);
315
316 //
317 // cpuid(0x7) Structured Extended Features Enumeration Leaf.
318 //
319 __ bind(sef_cpuid);
320 __ movl(rax, 7);
321 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
322 __ jccb(Assembler::greater, ext_cpuid);
323 // ECX = 0
324 __ xorl(rcx, rcx);
325 __ cpuid();
326 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
327 __ movl(Address(rsi, 0), rax);
328 __ movl(Address(rsi, 4), rbx);
329 __ movl(Address(rsi, 8), rcx);
330 __ movl(Address(rsi, 12), rdx);
331
332 //
333 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
334 //
335 __ bind(sefsl1_cpuid);
336 __ movl(rax, 7);
337 __ movl(rcx, 1);
338 __ cpuid();
339 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
340 __ movl(Address(rsi, 0), rax);
341 __ movl(Address(rsi, 4), rdx);
342
343 //
344 // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
345 //
346 __ bind(std_cpuid29);
347 __ movl(rax, 0x29);
348 __ movl(rcx, 0);
349 __ cpuid();
350 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
351 __ movl(Address(rsi, 0), rbx);
352
353 //
354 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
355 //
356 __ bind(std_cpuid24);
357 __ movl(rax, 0x24);
358 __ movl(rcx, 0);
359 __ cpuid();
360 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
361 __ movl(Address(rsi, 0), rax);
362 __ movl(Address(rsi, 4), rbx);
363
364 //
365 // Extended cpuid(0x80000000)
366 //
367 __ bind(ext_cpuid);
368 __ movl(rax, 0x80000000);
369 __ cpuid();
370 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
371 __ jcc(Assembler::belowEqual, done);
372 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
373 __ jcc(Assembler::belowEqual, ext_cpuid1);
374 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
375 __ jccb(Assembler::belowEqual, ext_cpuid5);
376 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
377 __ jccb(Assembler::belowEqual, ext_cpuid7);
378 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
379 __ jccb(Assembler::belowEqual, ext_cpuid8);
380 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
381 __ jccb(Assembler::below, ext_cpuid8);
382 //
383 // Extended cpuid(0x8000001E)
384 //
385 __ movl(rax, 0x8000001E);
386 __ cpuid();
387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
388 __ movl(Address(rsi, 0), rax);
389 __ movl(Address(rsi, 4), rbx);
390 __ movl(Address(rsi, 8), rcx);
391 __ movl(Address(rsi,12), rdx);
392
393 //
394 // Extended cpuid(0x80000008)
395 //
396 __ bind(ext_cpuid8);
397 __ movl(rax, 0x80000008);
398 __ cpuid();
399 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
400 __ movl(Address(rsi, 0), rax);
401 __ movl(Address(rsi, 4), rbx);
402 __ movl(Address(rsi, 8), rcx);
403 __ movl(Address(rsi,12), rdx);
404
405 //
406 // Extended cpuid(0x80000007)
407 //
408 __ bind(ext_cpuid7);
409 __ movl(rax, 0x80000007);
410 __ cpuid();
411 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
412 __ movl(Address(rsi, 0), rax);
413 __ movl(Address(rsi, 4), rbx);
414 __ movl(Address(rsi, 8), rcx);
415 __ movl(Address(rsi,12), rdx);
416
417 //
418 // Extended cpuid(0x80000005)
419 //
420 __ bind(ext_cpuid5);
421 __ movl(rax, 0x80000005);
422 __ cpuid();
423 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
424 __ movl(Address(rsi, 0), rax);
425 __ movl(Address(rsi, 4), rbx);
426 __ movl(Address(rsi, 8), rcx);
427 __ movl(Address(rsi,12), rdx);
428
429 //
430 // Extended cpuid(0x80000001)
431 //
432 __ bind(ext_cpuid1);
433 __ movl(rax, 0x80000001);
434 __ cpuid();
435 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
436 __ movl(Address(rsi, 0), rax);
437 __ movl(Address(rsi, 4), rbx);
438 __ movl(Address(rsi, 8), rcx);
439 __ movl(Address(rsi,12), rdx);
440
441 //
442 // Check if OS has enabled XGETBV instruction to access XCR0
443 // (OSXSAVE feature flag) and CPU supports APX
444 //
445 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
446 // and XCRO[19] bit for OS support to save/restore extended GPR state.
447 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
448 __ movl(rax, 0x200000);
449 __ andl(rax, Address(rsi, 4));
450 __ jcc(Assembler::equal, vector_save_restore);
451 // check _cpuid_info.xem_xcr0_eax.bits.apx_f
452 __ movl(rax, 0x80000);
453 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
454 __ jcc(Assembler::equal, vector_save_restore);
455
456 bool save_apx = UseAPX;
457 VM_Version::set_apx_cpuFeatures();
458 UseAPX = true;
459 __ mov64(r16, VM_Version::egpr_test_value());
460 __ mov64(r31, VM_Version::egpr_test_value());
461 __ xorl(rsi, rsi);
462 VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
463 // Generate SEGV
464 __ movl(rax, Address(rsi, 0));
465
466 VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
467 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
468 __ movq(Address(rsi, 0), r16);
469 __ movq(Address(rsi, 8), r31);
470
471 //
472 // Query CPUID 0xD.19 for APX XSAVE offset
473 // Extended State Enumeration Sub-leaf 19 (APX)
474 // EAX = size of APX state (should be 128)
475 // EBX = offset in standard XSAVE format
476 //
477 __ movl(rax, 0xD);
478 __ movl(rcx, 19);
479 __ cpuid();
480 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
481 __ movl(Address(rsi, 0), rax);
482 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
483 __ movl(Address(rsi, 0), rbx);
484
485 UseAPX = save_apx;
486 __ bind(vector_save_restore);
487 //
488 // Check if OS has enabled XGETBV instruction to access XCR0
489 // (OSXSAVE feature flag) and CPU supports AVX
490 //
491 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
492 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
493 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
494 __ cmpl(rcx, 0x18000000);
495 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
496
497 __ movl(rax, 0x6);
498 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
499 __ cmpl(rax, 0x6);
500 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
501
502 // we need to bridge farther than imm8, so we use this island as a thunk
503 __ bind(done);
504 __ jmp(wrapup);
505
506 __ bind(start_simd_check);
507 //
508 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
509 // registers are not restored after a signal processing.
510 // Generate SEGV here (reference through null)
511 // and check upper YMM/ZMM bits after it.
512 //
513 int saved_useavx = UseAVX;
514 int saved_usesse = UseSSE;
515
516 // If UseAVX is uninitialized or is set by the user to include EVEX
517 if (use_evex) {
518 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
519 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
520 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
521 __ movl(rax, 0x10000);
522 __ andl(rax, Address(rsi, 4));
523 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
524 __ movl(rbx, 0x80000);
525 __ andl(rbx, Address(rsi, 4));
526 __ orl(rax, rbx);
527 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
528 // check _cpuid_info.xem_xcr0_eax.bits.opmask
529 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
530 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
531 __ movl(rax, 0xE0);
532 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
533 __ cmpl(rax, 0xE0);
534 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
535
536 if (FLAG_IS_DEFAULT(UseAVX)) {
537 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
538 __ movl(rax, Address(rsi, 0));
539 __ cmpl(rax, 0x50654); // If it is Skylake
540 __ jcc(Assembler::equal, legacy_setup);
541 }
542 // EVEX setup: run in lowest evex mode
543 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
544 UseAVX = 3;
545 UseSSE = 2;
546 #ifdef _WINDOWS
547 // xmm5-xmm15 are not preserved by caller on windows
548 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
549 __ subptr(rsp, 64);
550 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
551 __ subptr(rsp, 64);
552 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
553 __ subptr(rsp, 64);
554 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
555 #endif // _WINDOWS
556
557 // load value into all 64 bytes of zmm7 register
558 __ movl(rcx, VM_Version::ymm_test_value());
559 __ movdl(xmm0, rcx);
560 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
561 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
562 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
563 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
564 VM_Version::clean_cpuFeatures();
565 __ jmp(save_restore_except);
566 }
567
568 __ bind(legacy_setup);
569 // AVX setup
570 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
571 UseAVX = 1;
572 UseSSE = 2;
573 #ifdef _WINDOWS
574 __ subptr(rsp, 32);
575 __ vmovdqu(Address(rsp, 0), xmm7);
576 __ subptr(rsp, 32);
577 __ vmovdqu(Address(rsp, 0), xmm8);
578 __ subptr(rsp, 32);
579 __ vmovdqu(Address(rsp, 0), xmm15);
580 #endif // _WINDOWS
581
582 // load value into all 32 bytes of ymm7 register
583 __ movl(rcx, VM_Version::ymm_test_value());
584
585 __ movdl(xmm0, rcx);
586 __ pshufd(xmm0, xmm0, 0x00);
587 __ vinsertf128_high(xmm0, xmm0);
588 __ vmovdqu(xmm7, xmm0);
589 __ vmovdqu(xmm8, xmm0);
590 __ vmovdqu(xmm15, xmm0);
591 VM_Version::clean_cpuFeatures();
592
593 __ bind(save_restore_except);
594 __ xorl(rsi, rsi);
595 VM_Version::set_cpuinfo_segv_addr(__ pc());
596 // Generate SEGV
597 __ movl(rax, Address(rsi, 0));
598
599 VM_Version::set_cpuinfo_cont_addr(__ pc());
600 // Returns here after signal. Save xmm0 to check it later.
601
602 // If UseAVX is uninitialized or is set by the user to include EVEX
603 if (use_evex) {
604 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
605 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
606 __ movl(rax, 0x10000);
607 __ andl(rax, Address(rsi, 4));
608 __ jcc(Assembler::equal, legacy_save_restore);
609 // check _cpuid_info.xem_xcr0_eax.bits.opmask
610 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
611 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
612 __ movl(rax, 0xE0);
613 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
614 __ cmpl(rax, 0xE0);
615 __ jcc(Assembler::notEqual, legacy_save_restore);
616
617 if (FLAG_IS_DEFAULT(UseAVX)) {
618 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
619 __ movl(rax, Address(rsi, 0));
620 __ cmpl(rax, 0x50654); // If it is Skylake
621 __ jcc(Assembler::equal, legacy_save_restore);
622 }
623 // EVEX check: run in lowest evex mode
624 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
625 UseAVX = 3;
626 UseSSE = 2;
627 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
628 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
629 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
630 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
631 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
632
633 #ifdef _WINDOWS
634 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
635 __ addptr(rsp, 64);
636 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
637 __ addptr(rsp, 64);
638 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
639 __ addptr(rsp, 64);
640 #endif // _WINDOWS
641 generate_vzeroupper(wrapup);
642 VM_Version::clean_cpuFeatures();
643 UseAVX = saved_useavx;
644 UseSSE = saved_usesse;
645 __ jmp(wrapup);
646 }
647
648 __ bind(legacy_save_restore);
649 // AVX check
650 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
651 UseAVX = 1;
652 UseSSE = 2;
653 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
654 __ vmovdqu(Address(rsi, 0), xmm0);
655 __ vmovdqu(Address(rsi, 32), xmm7);
656 __ vmovdqu(Address(rsi, 64), xmm8);
657 __ vmovdqu(Address(rsi, 96), xmm15);
658
659 #ifdef _WINDOWS
660 __ vmovdqu(xmm15, Address(rsp, 0));
661 __ addptr(rsp, 32);
662 __ vmovdqu(xmm8, Address(rsp, 0));
663 __ addptr(rsp, 32);
664 __ vmovdqu(xmm7, Address(rsp, 0));
665 __ addptr(rsp, 32);
666 #endif // _WINDOWS
667
668 generate_vzeroupper(wrapup);
669 VM_Version::clean_cpuFeatures();
670 UseAVX = saved_useavx;
671 UseSSE = saved_usesse;
672
673 __ bind(wrapup);
674 __ popf();
675 __ pop(rsi);
676 __ pop(rbx);
677 __ pop(rbp);
678 __ ret(0);
679
680 # undef __
681
682 return start;
683 };
684 void generate_vzeroupper(Label& L_wrapup) {
685 # define __ _masm->
686 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
687 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
688 __ jcc(Assembler::notEqual, L_wrapup);
689 __ movl(rcx, 0x0FFF0FF0);
690 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
691 __ andl(rcx, Address(rsi, 0));
692 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
693 __ jcc(Assembler::equal, L_wrapup);
694 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
695 __ jcc(Assembler::equal, L_wrapup);
696 // vzeroupper() will use a pre-computed instruction sequence that we
697 // can't compute until after we've determined CPU capabilities. Use
698 // uncached variant here directly to be able to bootstrap correctly
699 __ vzeroupper_uncached();
700 # undef __
701 }
702 address generate_detect_virt() {
703 StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
704 # define __ _masm->
705
706 address start = __ pc();
707
708 // Evacuate callee-saved registers
709 __ push(rbp);
710 __ push(rbx);
711 __ push(rsi); // for Windows
712
713 __ mov(rax, c_rarg0); // CPUID leaf
714 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
715
716 __ cpuid();
717
718 // Store result to register array
719 __ movl(Address(rsi, 0), rax);
720 __ movl(Address(rsi, 4), rbx);
721 __ movl(Address(rsi, 8), rcx);
722 __ movl(Address(rsi, 12), rdx);
723
724 // Epilogue
725 __ pop(rsi);
726 __ pop(rbx);
727 __ pop(rbp);
728 __ ret(0);
729
730 # undef __
731
732 return start;
733 };
734
735
736 address generate_getCPUIDBrandString(void) {
737 // Flags to test CPU type.
738 const uint32_t HS_EFL_AC = 0x40000;
739 const uint32_t HS_EFL_ID = 0x200000;
740 // Values for when we don't have a CPUID instruction.
741 const int CPU_FAMILY_SHIFT = 8;
742 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
743 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
744
745 Label detect_486, cpu486, detect_586, done, ext_cpuid;
746
747 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
748 # define __ _masm->
749
750 address start = __ pc();
751
752 //
753 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
754 //
755 // rcx and rdx are first and second argument registers on windows
756
757 __ push(rbp);
758 __ mov(rbp, c_rarg0); // cpuid_info address
759 __ push(rbx);
760 __ push(rsi);
761 __ pushf(); // preserve rbx, and flags
762 __ pop(rax);
763 __ push(rax);
764 __ mov(rcx, rax);
765 //
766 // if we are unable to change the AC flag, we have a 386
767 //
768 __ xorl(rax, HS_EFL_AC);
769 __ push(rax);
770 __ popf();
771 __ pushf();
772 __ pop(rax);
773 __ cmpptr(rax, rcx);
774 __ jccb(Assembler::notEqual, detect_486);
775
776 __ movl(rax, CPU_FAMILY_386);
777 __ jmp(done);
778
779 //
780 // If we are unable to change the ID flag, we have a 486 which does
781 // not support the "cpuid" instruction.
782 //
783 __ bind(detect_486);
784 __ mov(rax, rcx);
785 __ xorl(rax, HS_EFL_ID);
786 __ push(rax);
787 __ popf();
788 __ pushf();
789 __ pop(rax);
790 __ cmpptr(rcx, rax);
791 __ jccb(Assembler::notEqual, detect_586);
792
793 __ bind(cpu486);
794 __ movl(rax, CPU_FAMILY_486);
795 __ jmp(done);
796
797 //
798 // At this point, we have a chip which supports the "cpuid" instruction
799 //
800 __ bind(detect_586);
801 __ xorl(rax, rax);
802 __ cpuid();
803 __ orl(rax, rax);
804 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
805 // value of at least 1, we give up and
806 // assume a 486
807
808 //
809 // Extended cpuid(0x80000000) for processor brand string detection
810 //
811 __ bind(ext_cpuid);
812 __ movl(rax, CPUID_EXTENDED_FN);
813 __ cpuid();
814 __ cmpl(rax, CPUID_EXTENDED_FN_4);
815 __ jcc(Assembler::below, done);
816
817 //
818 // Extended cpuid(0x80000002) // first 16 bytes in brand string
819 //
820 __ movl(rax, CPUID_EXTENDED_FN_2);
821 __ cpuid();
822 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
823 __ movl(Address(rsi, 0), rax);
824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
825 __ movl(Address(rsi, 0), rbx);
826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
827 __ movl(Address(rsi, 0), rcx);
828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
829 __ movl(Address(rsi,0), rdx);
830
831 //
832 // Extended cpuid(0x80000003) // next 16 bytes in brand string
833 //
834 __ movl(rax, CPUID_EXTENDED_FN_3);
835 __ cpuid();
836 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
837 __ movl(Address(rsi, 0), rax);
838 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
839 __ movl(Address(rsi, 0), rbx);
840 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
841 __ movl(Address(rsi, 0), rcx);
842 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
843 __ movl(Address(rsi,0), rdx);
844
845 //
846 // Extended cpuid(0x80000004) // last 16 bytes in brand string
847 //
848 __ movl(rax, CPUID_EXTENDED_FN_4);
849 __ cpuid();
850 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
851 __ movl(Address(rsi, 0), rax);
852 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
853 __ movl(Address(rsi, 0), rbx);
854 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
855 __ movl(Address(rsi, 0), rcx);
856 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
857 __ movl(Address(rsi,0), rdx);
858
859 //
860 // return
861 //
862 __ bind(done);
863 __ popf();
864 __ pop(rsi);
865 __ pop(rbx);
866 __ pop(rbp);
867 __ ret(0);
868
869 # undef __
870
871 return start;
872 };
873 };
874
875 void VM_Version::get_processor_features() {
876
877 _cpu = 4; // 486 by default
878 _model = 0;
879 _stepping = 0;
880 _logical_processors_per_package = 1;
881 // i486 internal cache is both I&D and has a 16-byte line size
882 _L1_data_cache_line_size = 16;
883
884 // Get raw processor info
885
886 get_cpu_info_stub(&_cpuid_info);
887
888 assert_is_initialized();
889 _cpu = extended_cpu_family();
890 _model = extended_cpu_model();
891 _stepping = cpu_stepping();
892
893 if (cpu_family() > 4) { // it supports CPUID
894 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
895 _cpu_features = _features; // Preserve features
896 // Logical processors are only available on P4s and above,
897 // and only if hyperthreading is available.
898 _logical_processors_per_package = logical_processor_count();
899 _L1_data_cache_line_size = L1_line_size();
900 }
901
902 // xchg and xadd instructions
903 _supports_atomic_getset4 = true;
904 _supports_atomic_getadd4 = true;
905 _supports_atomic_getset8 = true;
906 _supports_atomic_getadd8 = true;
907
908 // OS should support SSE for x64 and hardware should support at least SSE2.
909 if (!VM_Version::supports_sse2()) {
910 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
911 }
912 // in 64 bit the use of SSE2 is the minimum
913 if (UseSSE < 2) UseSSE = 2;
914
915 // flush_icache_stub have to be generated first.
916 // That is why Icache line size is hard coded in ICache class,
917 // see icache_x86.hpp. It is also the reason why we can't use
918 // clflush instruction in 32-bit VM since it could be running
919 // on CPU which does not support it.
920 //
921 // The only thing we can do is to verify that flushed
922 // ICache::line_size has correct value.
923 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
924 // clflush_size is size in quadwords (8 bytes).
925 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
926
927 // assigning this field effectively enables Unsafe.writebackMemory()
928 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
929 // that is only implemented on x86_64 and only if the OS plays ball
930 if (os::supports_map_sync()) {
931 // publish data cache line flush size to generic field, otherwise
932 // let if default to zero thereby disabling writeback
933 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
934 }
935
936 // Check if processor has Intel Ecore
937 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
938 (supports_hybrid() ||
939 _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
940 _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
941 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
942 }
943
944 if (UseSSE < 4) {
945 _features.clear_feature(CPU_SSE4_1);
946 _features.clear_feature(CPU_SSE4_2);
947 }
948
949 if (UseSSE < 3) {
950 _features.clear_feature(CPU_SSE3);
951 _features.clear_feature(CPU_SSSE3);
952 _features.clear_feature(CPU_SSE4A);
953 }
954
955 if (UseSSE < 2)
956 _features.clear_feature(CPU_SSE2);
957
958 if (UseSSE < 1)
959 _features.clear_feature(CPU_SSE);
960
961 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
962 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
963 UseAVX = 0;
964 }
965
966 // UseSSE is set to the smaller of what hardware supports and what
967 // the command line requires. I.e., you cannot set UseSSE to 2 on
968 // older Pentiums which do not support it.
969 int use_sse_limit = 0;
970 if (UseSSE > 0) {
971 if (UseSSE > 3 && supports_sse4_1()) {
972 use_sse_limit = 4;
973 } else if (UseSSE > 2 && supports_sse3()) {
974 use_sse_limit = 3;
975 } else if (UseSSE > 1 && supports_sse2()) {
976 use_sse_limit = 2;
977 } else if (UseSSE > 0 && supports_sse()) {
978 use_sse_limit = 1;
979 } else {
980 use_sse_limit = 0;
981 }
982 }
983 if (FLAG_IS_DEFAULT(UseSSE)) {
984 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
985 } else if (UseSSE > use_sse_limit) {
986 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
987 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
988 }
989
990 // first try initial setting and detect what we can support
991 int use_avx_limit = 0;
992 if (UseAVX > 0) {
993 if (UseSSE < 4) {
994 // Don't use AVX if SSE is unavailable or has been disabled.
995 use_avx_limit = 0;
996 } else if (UseAVX > 2 && supports_evex()) {
997 use_avx_limit = 3;
998 } else if (UseAVX > 1 && supports_avx2()) {
999 use_avx_limit = 2;
1000 } else if (UseAVX > 0 && supports_avx()) {
1001 use_avx_limit = 1;
1002 } else {
1003 use_avx_limit = 0;
1004 }
1005 }
1006 if (FLAG_IS_DEFAULT(UseAVX)) {
1007 // Don't use AVX-512 on older Skylakes unless explicitly requested.
1008 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
1009 FLAG_SET_DEFAULT(UseAVX, 2);
1010 } else {
1011 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1012 }
1013 }
1014
1015 if (UseAVX > use_avx_limit) {
1016 if (UseSSE < 4) {
1017 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1018 } else {
1019 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1020 }
1021 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1022 }
1023
1024 if (UseAVX < 3) {
1025 _features.clear_feature(CPU_AVX512F);
1026 _features.clear_feature(CPU_AVX512DQ);
1027 _features.clear_feature(CPU_AVX512CD);
1028 _features.clear_feature(CPU_AVX512BW);
1029 _features.clear_feature(CPU_AVX512ER);
1030 _features.clear_feature(CPU_AVX512PF);
1031 _features.clear_feature(CPU_AVX512VL);
1032 _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1033 _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1034 _features.clear_feature(CPU_AVX512_VAES);
1035 _features.clear_feature(CPU_AVX512_VNNI);
1036 _features.clear_feature(CPU_AVX512_VBMI);
1037 _features.clear_feature(CPU_AVX512_VBMI2);
1038 _features.clear_feature(CPU_AVX512_BITALG);
1039 _features.clear_feature(CPU_AVX512_IFMA);
1040 _features.clear_feature(CPU_APX_F);
1041 _features.clear_feature(CPU_AVX512_FP16);
1042 _features.clear_feature(CPU_AVX10_1);
1043 _features.clear_feature(CPU_AVX10_2);
1044 }
1045
1046
1047 if (UseAVX < 2) {
1048 _features.clear_feature(CPU_AVX2);
1049 _features.clear_feature(CPU_AVX_IFMA);
1050 }
1051
1052 if (UseAVX < 1) {
1053 _features.clear_feature(CPU_AVX);
1054 _features.clear_feature(CPU_VZEROUPPER);
1055 _features.clear_feature(CPU_F16C);
1056 _features.clear_feature(CPU_SHA512);
1057 }
1058
1059 if (logical_processors_per_package() == 1) {
1060 // HT processor could be installed on a system which doesn't support HT.
1061 _features.clear_feature(CPU_HT);
1062 }
1063
1064 if (is_intel()) { // Intel cpus specific settings
1065 if (is_knights_family()) {
1066 _features.clear_feature(CPU_VZEROUPPER);
1067 _features.clear_feature(CPU_AVX512BW);
1068 _features.clear_feature(CPU_AVX512VL);
1069 _features.clear_feature(CPU_APX_F);
1070 _features.clear_feature(CPU_AVX512DQ);
1071 _features.clear_feature(CPU_AVX512_VNNI);
1072 _features.clear_feature(CPU_AVX512_VAES);
1073 _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1074 _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1075 _features.clear_feature(CPU_AVX512_VBMI);
1076 _features.clear_feature(CPU_AVX512_VBMI2);
1077 _features.clear_feature(CPU_CLWB);
1078 _features.clear_feature(CPU_FLUSHOPT);
1079 _features.clear_feature(CPU_GFNI);
1080 _features.clear_feature(CPU_AVX512_BITALG);
1081 _features.clear_feature(CPU_AVX512_IFMA);
1082 _features.clear_feature(CPU_AVX_IFMA);
1083 _features.clear_feature(CPU_AVX512_FP16);
1084 _features.clear_feature(CPU_AVX10_1);
1085 _features.clear_feature(CPU_AVX10_2);
1086 }
1087 }
1088
1089 // Currently APX support is only enabled for targets supporting AVX512VL feature.
1090 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1091 if (UseAPX && !apx_supported) {
1092 warning("UseAPX is not supported on this CPU, setting it to false");
1093 FLAG_SET_DEFAULT(UseAPX, false);
1094 }
1095
1096 if (!UseAPX) {
1097 _features.clear_feature(CPU_APX_F);
1098 }
1099
1100 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1101 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1102 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1103 } else {
1104 _has_intel_jcc_erratum = IntelJccErratumMitigation;
1105 }
1106
1107 assert(supports_clflush(), "Always present");
1108 if (X86ICacheSync == -1) {
1109 // Auto-detect, choosing the best performant one that still flushes
1110 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1111 if (supports_clwb()) {
1112 FLAG_SET_ERGO(X86ICacheSync, 3);
1113 } else if (supports_clflushopt()) {
1114 FLAG_SET_ERGO(X86ICacheSync, 2);
1115 } else {
1116 FLAG_SET_ERGO(X86ICacheSync, 1);
1117 }
1118 } else {
1119 if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1120 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1121 }
1122 if ((X86ICacheSync == 3) && !supports_clwb()) {
1123 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1124 }
1125 if ((X86ICacheSync == 5) && !supports_serialize()) {
1126 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1127 }
1128 }
1129
1130 stringStream ss(2048);
1131 if (supports_hybrid()) {
1132 ss.print("(hybrid)");
1133 } else {
1134 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1135 }
1136 ss.print(" family %d model %d stepping %d microcode 0x%x",
1137 cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1138 ss.print(", ");
1139 int features_offset = (int)ss.size();
1140 insert_features_names(_features, ss);
1141
1142 _cpu_info_string = ss.as_string(true);
1143 _features_string = _cpu_info_string + features_offset;
1144
1145 // Use AES instructions if available.
1146 if (supports_aes()) {
1147 if (FLAG_IS_DEFAULT(UseAES)) {
1148 FLAG_SET_DEFAULT(UseAES, true);
1149 }
1150 if (!UseAES) {
1151 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1152 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1153 }
1154 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1155 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1156 warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1157 }
1158 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1159 } else {
1160 if (UseSSE > 2) {
1161 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1162 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1163 }
1164 } else {
1165 // The AES intrinsic stubs require AES instruction support (of course)
1166 // but also require sse3 mode or higher for instructions it use.
1167 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1168 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1169 }
1170 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1171 }
1172
1173 // --AES-CTR begins--
1174 if (!UseAESIntrinsics) {
1175 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1176 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1177 }
1178 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1179 } else {
1180 if (supports_sse4_1()) {
1181 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1182 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1183 }
1184 } else {
1185 // The AES-CTR intrinsic stubs require AES instruction support (of course)
1186 // but also require sse4.1 mode or higher for instructions it use.
1187 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1188 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1189 }
1190 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1191 }
1192 }
1193 // --AES-CTR ends--
1194 }
1195 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1196 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1197 warning("AES instructions are not available on this CPU");
1198 }
1199 FLAG_SET_DEFAULT(UseAES, false);
1200 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1201 warning("AES intrinsics are not available on this CPU");
1202 }
1203 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1204 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1205 warning("AES-CTR intrinsics are not available on this CPU");
1206 }
1207 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1208 }
1209
1210 // Use CLMUL instructions if available.
1211 if (supports_clmul()) {
1212 if (FLAG_IS_DEFAULT(UseCLMUL)) {
1213 UseCLMUL = true;
1214 }
1215 } else if (UseCLMUL) {
1216 if (!FLAG_IS_DEFAULT(UseCLMUL))
1217 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1218 FLAG_SET_DEFAULT(UseCLMUL, false);
1219 }
1220
1221 if (UseCLMUL && (UseSSE > 2)) {
1222 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1223 UseCRC32Intrinsics = true;
1224 }
1225 } else if (UseCRC32Intrinsics) {
1226 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1227 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1228 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1229 }
1230
1231 if (supports_avx2()) {
1232 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1233 UseAdler32Intrinsics = true;
1234 }
1235 } else if (UseAdler32Intrinsics) {
1236 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1237 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1238 }
1239 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1240 }
1241
1242 if (supports_sse4_2() && supports_clmul()) {
1243 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1244 UseCRC32CIntrinsics = true;
1245 }
1246 } else if (UseCRC32CIntrinsics) {
1247 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1248 warning("CRC32C intrinsics are not available on this CPU");
1249 }
1250 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1251 }
1252
1253 // GHASH/GCM intrinsics
1254 if (UseCLMUL && (UseSSE > 2)) {
1255 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1256 UseGHASHIntrinsics = true;
1257 }
1258 } else if (UseGHASHIntrinsics) {
1259 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1260 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1261 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1262 }
1263
1264 // ChaCha20 Intrinsics
1265 // As long as the system supports AVX as a baseline we can do a
1266 // SIMD-enabled block function. StubGenerator makes the determination
1267 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1268 // version.
1269 if (UseAVX >= 1) {
1270 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1271 UseChaCha20Intrinsics = true;
1272 }
1273 } else if (UseChaCha20Intrinsics) {
1274 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1275 warning("ChaCha20 intrinsic requires AVX instructions");
1276 }
1277 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1278 }
1279
1280 // Kyber Intrinsics
1281 // Currently we only have them for AVX512
1282 if (supports_evex() && supports_avx512bw()) {
1283 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1284 UseKyberIntrinsics = true;
1285 }
1286 } else
1287 if (UseKyberIntrinsics) {
1288 warning("Intrinsics for ML-KEM are not available on this CPU.");
1289 FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1290 }
1291
1292 // Dilithium Intrinsics
1293 if (UseAVX > 1) {
1294 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1295 UseDilithiumIntrinsics = true;
1296 }
1297 } else if (UseDilithiumIntrinsics) {
1298 warning("Intrinsics for ML-DSA are not available on this CPU.");
1299 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1300 }
1301
1302 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1303 if (UseAVX >= 2) {
1304 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1305 UseBASE64Intrinsics = true;
1306 }
1307 } else if (UseBASE64Intrinsics) {
1308 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1309 warning("Base64 intrinsic requires EVEX instructions on this CPU");
1310 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1311 }
1312
1313 if (supports_fma()) {
1314 if (FLAG_IS_DEFAULT(UseFMA)) {
1315 UseFMA = true;
1316 }
1317 } else if (UseFMA) {
1318 warning("FMA instructions are not available on this CPU");
1319 FLAG_SET_DEFAULT(UseFMA, false);
1320 }
1321
1322 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1323 UseMD5Intrinsics = true;
1324 }
1325
1326 if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1327 if (FLAG_IS_DEFAULT(UseSHA)) {
1328 UseSHA = true;
1329 }
1330 } else if (UseSHA) {
1331 warning("SHA instructions are not available on this CPU");
1332 FLAG_SET_DEFAULT(UseSHA, false);
1333 }
1334
1335 if (supports_sha() && supports_sse4_1() && UseSHA) {
1336 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1337 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1338 }
1339 } else if (UseSHA1Intrinsics) {
1340 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1341 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1342 }
1343
1344 if (supports_sse4_1() && UseSHA) {
1345 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1346 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1347 }
1348 } else if (UseSHA256Intrinsics) {
1349 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1350 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1351 }
1352
1353 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1354 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1355 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1356 }
1357 } else if (UseSHA512Intrinsics) {
1358 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1359 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1360 }
1361
1362 if (UseSHA && supports_evex() && supports_avx512bw()) {
1363 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1364 FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1365 }
1366 } else if (UseSHA3Intrinsics) {
1367 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1368 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1369 }
1370
1371 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics || UseSHA3Intrinsics)) {
1372 FLAG_SET_DEFAULT(UseSHA, false);
1373 }
1374
1375 #if COMPILER2_OR_JVMCI
1376 int max_vector_size = 0;
1377 if (UseAVX == 0 || !os_supports_avx_vectors()) {
1378 // 16 byte vectors (in XMM) are supported with SSE2+
1379 max_vector_size = 16;
1380 } else if (UseAVX == 1 || UseAVX == 2) {
1381 // 32 bytes vectors (in YMM) are only supported with AVX+
1382 max_vector_size = 32;
1383 } else if (UseAVX > 2) {
1384 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1385 max_vector_size = 64;
1386 }
1387
1388 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1389
1390 if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1391 if (MaxVectorSize < min_vector_size) {
1392 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1393 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1394 }
1395 if (MaxVectorSize > max_vector_size) {
1396 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1397 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1398 }
1399 if (!is_power_of_2(MaxVectorSize)) {
1400 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1401 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1402 }
1403 } else {
1404 // If default, use highest supported configuration
1405 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1406 }
1407
1408 #if defined(COMPILER2) && defined(ASSERT)
1409 if (MaxVectorSize > 0) {
1410 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1411 tty->print_cr("State of YMM registers after signal handle:");
1412 int nreg = 4;
1413 const char* ymm_name[4] = {"0", "7", "8", "15"};
1414 for (int i = 0; i < nreg; i++) {
1415 tty->print("YMM%s:", ymm_name[i]);
1416 for (int j = 7; j >=0; j--) {
1417 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1418 }
1419 tty->cr();
1420 }
1421 }
1422 }
1423 #endif // COMPILER2 && ASSERT
1424
1425 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1426 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1427 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1428 }
1429 } else if (UsePoly1305Intrinsics) {
1430 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1431 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1432 }
1433
1434 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1435 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1436 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1437 }
1438 } else if (UseIntPolyIntrinsics) {
1439 warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1440 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1441 }
1442
1443 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1444 UseMultiplyToLenIntrinsic = true;
1445 }
1446 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1447 UseSquareToLenIntrinsic = true;
1448 }
1449 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1450 UseMulAddIntrinsic = true;
1451 }
1452 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1453 UseMontgomeryMultiplyIntrinsic = true;
1454 }
1455 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1456 UseMontgomerySquareIntrinsic = true;
1457 }
1458 #endif // COMPILER2_OR_JVMCI
1459
1460 // On new cpus instructions which update whole XMM register should be used
1461 // to prevent partial register stall due to dependencies on high half.
1462 //
1463 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1464 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1465 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1466 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1467
1468
1469 if (is_zx()) { // ZX cpus specific settings
1470 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1471 UseStoreImmI16 = false; // don't use it on ZX cpus
1472 }
1473 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1474 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1475 // Use it on all ZX cpus
1476 UseAddressNop = true;
1477 }
1478 }
1479 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1480 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1481 }
1482 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1483 if (supports_sse3()) {
1484 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1485 } else {
1486 UseXmmRegToRegMoveAll = false;
1487 }
1488 }
1489 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1490 #ifdef COMPILER2
1491 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1492 // For new ZX cpus do the next optimization:
1493 // don't align the beginning of a loop if there are enough instructions
1494 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1495 // in current fetch line (OptoLoopAlignment) or the padding
1496 // is big (> MaxLoopPad).
1497 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1498 // generated NOP instructions. 11 is the largest size of one
1499 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1500 MaxLoopPad = 11;
1501 }
1502 #endif // COMPILER2
1503 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1504 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1505 }
1506 if (supports_sse4_2()) { // new ZX cpus
1507 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1508 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1509 }
1510 }
1511 }
1512
1513 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1514 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1515 }
1516 }
1517
1518 if (is_amd_family()) { // AMD cpus specific settings
1519 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1520 // Use it on new AMD cpus starting from Opteron.
1521 UseAddressNop = true;
1522 }
1523 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1524 // Use it on new AMD cpus starting from Opteron.
1525 UseNewLongLShift = true;
1526 }
1527 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1528 if (supports_sse4a()) {
1529 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1530 } else {
1531 UseXmmLoadAndClearUpper = false;
1532 }
1533 }
1534 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1535 if (supports_sse4a()) {
1536 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1537 } else {
1538 UseXmmRegToRegMoveAll = false;
1539 }
1540 }
1541 if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1542 if (supports_sse4a()) {
1543 UseXmmI2F = true;
1544 } else {
1545 UseXmmI2F = false;
1546 }
1547 }
1548 if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1549 if (supports_sse4a()) {
1550 UseXmmI2D = true;
1551 } else {
1552 UseXmmI2D = false;
1553 }
1554 }
1555
1556 // some defaults for AMD family 15h
1557 if (cpu_family() == 0x15) {
1558 // On family 15h processors default is no sw prefetch
1559 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1560 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1561 }
1562 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1563 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1564 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1565 }
1566 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1567 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1568 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1569 }
1570 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1571 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1572 }
1573 }
1574
1575 #ifdef COMPILER2
1576 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1577 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1578 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1579 }
1580 #endif // COMPILER2
1581
1582 // Some defaults for AMD family >= 17h && Hygon family 18h
1583 if (cpu_family() >= 0x17) {
1584 // On family >=17h processors use XMM and UnalignedLoadStores
1585 // for Array Copy
1586 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1587 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1588 }
1589 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1590 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1591 }
1592 #ifdef COMPILER2
1593 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1594 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1595 }
1596 #endif
1597 }
1598 }
1599
1600 if (is_intel()) { // Intel cpus specific settings
1601 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1602 UseStoreImmI16 = false; // don't use it on Intel cpus
1603 }
1604 if (is_intel_server_family() || cpu_family() == 15) {
1605 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1606 // Use it on all Intel cpus starting from PentiumPro
1607 UseAddressNop = true;
1608 }
1609 }
1610 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1611 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1612 }
1613 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1614 if (supports_sse3()) {
1615 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1616 } else {
1617 UseXmmRegToRegMoveAll = false;
1618 }
1619 }
1620 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1621 #ifdef COMPILER2
1622 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1623 // For new Intel cpus do the next optimization:
1624 // don't align the beginning of a loop if there are enough instructions
1625 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1626 // in current fetch line (OptoLoopAlignment) or the padding
1627 // is big (> MaxLoopPad).
1628 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1629 // generated NOP instructions. 11 is the largest size of one
1630 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1631 MaxLoopPad = 11;
1632 }
1633 #endif // COMPILER2
1634
1635 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1636 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1637 }
1638 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1639 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1640 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1641 }
1642 }
1643 }
1644 if (is_atom_family() || is_knights_family()) {
1645 #ifdef COMPILER2
1646 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1647 OptoScheduling = true;
1648 }
1649 #endif
1650 if (supports_sse4_2()) { // Silvermont
1651 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1652 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1653 }
1654 }
1655 if (FLAG_IS_DEFAULT(UseIncDec)) {
1656 FLAG_SET_DEFAULT(UseIncDec, false);
1657 }
1658 }
1659 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1660 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1661 }
1662 }
1663
1664 #ifdef COMPILER2
1665 if (UseAVX > 2) {
1666 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1667 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1668 ArrayOperationPartialInlineSize != 0 &&
1669 ArrayOperationPartialInlineSize != 16 &&
1670 ArrayOperationPartialInlineSize != 32 &&
1671 ArrayOperationPartialInlineSize != 64)) {
1672 int inline_size = 0;
1673 if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1674 inline_size = 64;
1675 } else if (MaxVectorSize >= 32) {
1676 inline_size = 32;
1677 } else if (MaxVectorSize >= 16) {
1678 inline_size = 16;
1679 }
1680 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1681 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1682 }
1683 ArrayOperationPartialInlineSize = inline_size;
1684 }
1685
1686 if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1687 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1688 if (ArrayOperationPartialInlineSize) {
1689 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1690 } else {
1691 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1692 }
1693 }
1694 }
1695
1696 if (FLAG_IS_DEFAULT(OptimizeFill)) {
1697 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1698 OptimizeFill = false;
1699 }
1700 }
1701 #endif
1702 if (supports_sse4_2()) {
1703 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1704 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1705 }
1706 } else {
1707 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1708 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1709 }
1710 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1711 }
1712 if (UseSSE42Intrinsics) {
1713 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1714 UseVectorizedMismatchIntrinsic = true;
1715 }
1716 } else if (UseVectorizedMismatchIntrinsic) {
1717 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1718 warning("vectorizedMismatch intrinsics are not available on this CPU");
1719 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1720 }
1721 if (UseAVX >= 2) {
1722 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1723 } else if (UseVectorizedHashCodeIntrinsic) {
1724 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1725 warning("vectorizedHashCode intrinsics are not available on this CPU");
1726 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1727 }
1728
1729 // Use count leading zeros count instruction if available.
1730 if (supports_lzcnt()) {
1731 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1732 UseCountLeadingZerosInstruction = true;
1733 }
1734 } else if (UseCountLeadingZerosInstruction) {
1735 warning("lzcnt instruction is not available on this CPU");
1736 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1737 }
1738
1739 // Use count trailing zeros instruction if available
1740 if (supports_bmi1()) {
1741 // tzcnt does not require VEX prefix
1742 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1743 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1744 // Don't use tzcnt if BMI1 is switched off on command line.
1745 UseCountTrailingZerosInstruction = false;
1746 } else {
1747 UseCountTrailingZerosInstruction = true;
1748 }
1749 }
1750 } else if (UseCountTrailingZerosInstruction) {
1751 warning("tzcnt instruction is not available on this CPU");
1752 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1753 }
1754
1755 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1756 // VEX prefix is generated only when AVX > 0.
1757 if (supports_bmi1() && supports_avx()) {
1758 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1759 UseBMI1Instructions = true;
1760 }
1761 } else if (UseBMI1Instructions) {
1762 warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1763 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1764 }
1765
1766 if (supports_bmi2() && supports_avx()) {
1767 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1768 UseBMI2Instructions = true;
1769 }
1770 } else if (UseBMI2Instructions) {
1771 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1772 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1773 }
1774
1775 // Use population count instruction if available.
1776 if (supports_popcnt()) {
1777 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1778 UsePopCountInstruction = true;
1779 }
1780 } else if (UsePopCountInstruction) {
1781 warning("POPCNT instruction is not available on this CPU");
1782 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1783 }
1784
1785 // Use fast-string operations if available.
1786 if (supports_erms()) {
1787 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1788 UseFastStosb = true;
1789 }
1790 } else if (UseFastStosb) {
1791 warning("fast-string operations are not available on this CPU");
1792 FLAG_SET_DEFAULT(UseFastStosb, false);
1793 }
1794
1795 // For AMD Processors use XMM/YMM MOVDQU instructions
1796 // for Object Initialization as default
1797 if (is_amd() && cpu_family() >= 0x19) {
1798 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1799 UseFastStosb = false;
1800 }
1801 }
1802
1803 #ifdef COMPILER2
1804 if (is_intel() && MaxVectorSize > 16) {
1805 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1806 UseFastStosb = false;
1807 }
1808 }
1809 #endif
1810
1811 // Use XMM/YMM MOVDQU instruction for Object Initialization
1812 if (!UseFastStosb && UseUnalignedLoadStores) {
1813 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1814 UseXMMForObjInit = true;
1815 }
1816 } else if (UseXMMForObjInit) {
1817 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1818 FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1819 }
1820
1821 #ifdef COMPILER2
1822 if (FLAG_IS_DEFAULT(AlignVector)) {
1823 // Modern processors allow misaligned memory operations for vectors.
1824 AlignVector = !UseUnalignedLoadStores;
1825 }
1826 #endif // COMPILER2
1827
1828 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1829 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1830 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1831 } else if (!supports_sse() && supports_3dnow_prefetch()) {
1832 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1833 }
1834 }
1835
1836 // Allocation prefetch settings
1837 int cache_line_size = checked_cast<int>(prefetch_data_size());
1838 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1839 (cache_line_size > AllocatePrefetchStepSize)) {
1840 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1841 }
1842
1843 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1844 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1845 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1846 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1847 }
1848 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1849 }
1850
1851 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1852 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1853 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1854 }
1855
1856 if (is_intel() && is_intel_server_family() && supports_sse3()) {
1857 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1858 supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1859 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1860 }
1861 #ifdef COMPILER2
1862 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1863 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1864 }
1865 #endif
1866 }
1867
1868 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1869 #ifdef COMPILER2
1870 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1871 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1872 }
1873 #endif
1874 }
1875
1876 // Prefetch settings
1877
1878 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1879 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1880 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1881 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1882
1883 // gc copy/scan is disabled if prefetchw isn't supported, because
1884 // Prefetch::write emits an inlined prefetchw on Linux.
1885 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1886 // The used prefetcht0 instruction works for both amd64 and em64t.
1887
1888 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1889 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1890 }
1891 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1892 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1893 }
1894
1895 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1896 (cache_line_size > ContendedPaddingWidth))
1897 ContendedPaddingWidth = cache_line_size;
1898
1899 // This machine allows unaligned memory accesses
1900 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1901 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1902 }
1903
1904 #ifndef PRODUCT
1905 if (log_is_enabled(Info, os, cpu)) {
1906 LogStream ls(Log(os, cpu)::info());
1907 outputStream* log = &ls;
1908 log->print_cr("Logical CPUs per core: %u",
1909 logical_processors_per_package());
1910 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1911 log->print("UseSSE=%d", UseSSE);
1912 if (UseAVX > 0) {
1913 log->print(" UseAVX=%d", UseAVX);
1914 }
1915 if (UseAES) {
1916 log->print(" UseAES=1");
1917 }
1918 #ifdef COMPILER2
1919 if (MaxVectorSize > 0) {
1920 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1921 }
1922 #endif
1923 log->cr();
1924 log->print("Allocation");
1925 if (AllocatePrefetchStyle <= 0) {
1926 log->print_cr(": no prefetching");
1927 } else {
1928 log->print(" prefetching: ");
1929 if (AllocatePrefetchInstr == 0) {
1930 log->print("PREFETCHNTA");
1931 } else if (AllocatePrefetchInstr == 1) {
1932 log->print("PREFETCHT0");
1933 } else if (AllocatePrefetchInstr == 2) {
1934 log->print("PREFETCHT2");
1935 } else if (AllocatePrefetchInstr == 3) {
1936 log->print("PREFETCHW");
1937 }
1938 if (AllocatePrefetchLines > 1) {
1939 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1940 } else {
1941 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1942 }
1943 }
1944
1945 if (PrefetchCopyIntervalInBytes > 0) {
1946 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1947 }
1948 if (PrefetchScanIntervalInBytes > 0) {
1949 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1950 }
1951 if (ContendedPaddingWidth > 0) {
1952 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1953 }
1954 }
1955 #endif // !PRODUCT
1956 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1957 FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1958 }
1959 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1960 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1961 }
1962 }
1963
1964 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1965 VirtualizationType vrt = VM_Version::get_detected_virtualization();
1966 if (vrt == XenHVM) {
1967 st->print_cr("Xen hardware-assisted virtualization detected");
1968 } else if (vrt == KVM) {
1969 st->print_cr("KVM virtualization detected");
1970 } else if (vrt == VMWare) {
1971 st->print_cr("VMWare virtualization detected");
1972 VirtualizationSupport::print_virtualization_info(st);
1973 } else if (vrt == HyperV) {
1974 st->print_cr("Hyper-V virtualization detected");
1975 } else if (vrt == HyperVRole) {
1976 st->print_cr("Hyper-V role detected");
1977 }
1978 }
1979
1980 bool VM_Version::compute_has_intel_jcc_erratum() {
1981 if (!is_intel_family_core()) {
1982 // Only Intel CPUs are affected.
1983 return false;
1984 }
1985 // The following table of affected CPUs is based on the following document released by Intel:
1986 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1987 switch (_model) {
1988 case 0x8E:
1989 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1990 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1991 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1992 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1993 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1994 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1995 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1996 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1997 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1998 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1999 case 0x4E:
2000 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
2001 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
2002 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
2003 return _stepping == 0x3;
2004 case 0x55:
2005 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
2006 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
2007 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
2008 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
2009 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
2010 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
2011 return _stepping == 0x4 || _stepping == 0x7;
2012 case 0x5E:
2013 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
2014 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2015 return _stepping == 0x3;
2016 case 0x9E:
2017 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2018 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2019 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2020 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2021 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2022 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2023 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2024 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2025 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2026 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2027 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2028 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2029 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2030 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2031 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2032 case 0xA5:
2033 // Not in Intel documentation.
2034 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2035 return true;
2036 case 0xA6:
2037 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2038 return _stepping == 0x0;
2039 case 0xAE:
2040 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2041 return _stepping == 0xA;
2042 default:
2043 // If we are running on another intel machine not recognized in the table, we are okay.
2044 return false;
2045 }
2046 }
2047
2048 // On Xen, the cpuid instruction returns
2049 // eax / registers[0]: Version of Xen
2050 // ebx / registers[1]: chars 'XenV'
2051 // ecx / registers[2]: chars 'MMXe'
2052 // edx / registers[3]: chars 'nVMM'
2053 //
2054 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2055 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2056 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2057 // edx / registers[3]: chars 'M' / 'ware' / 't Hv'
2058 //
2059 // more information :
2060 // https://kb.vmware.com/s/article/1009458
2061 //
2062 void VM_Version::check_virtualizations() {
2063 uint32_t registers[4] = {0};
2064 char signature[13] = {0};
2065
2066 // Xen cpuid leaves can be found 0x100 aligned boundary starting
2067 // from 0x40000000 until 0x40010000.
2068 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2069 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2070 detect_virt_stub(leaf, registers);
2071 memcpy(signature, ®isters[1], 12);
2072
2073 if (strncmp("VMwareVMware", signature, 12) == 0) {
2074 Abstract_VM_Version::_detected_virtualization = VMWare;
2075 // check for extended metrics from guestlib
2076 VirtualizationSupport::initialize();
2077 } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2078 Abstract_VM_Version::_detected_virtualization = HyperV;
2079 #ifdef _WINDOWS
2080 // CPUID leaf 0x40000007 is available to the root partition only.
2081 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2082 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2083 detect_virt_stub(0x40000007, registers);
2084 if ((registers[0] != 0x0) ||
2085 (registers[1] != 0x0) ||
2086 (registers[2] != 0x0) ||
2087 (registers[3] != 0x0)) {
2088 Abstract_VM_Version::_detected_virtualization = HyperVRole;
2089 }
2090 #endif
2091 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2092 Abstract_VM_Version::_detected_virtualization = KVM;
2093 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2094 Abstract_VM_Version::_detected_virtualization = XenHVM;
2095 }
2096 }
2097 }
2098
2099 #ifdef COMPILER2
2100 // Determine if it's running on Cascade Lake using default options.
2101 bool VM_Version::is_default_intel_cascade_lake() {
2102 return FLAG_IS_DEFAULT(UseAVX) &&
2103 FLAG_IS_DEFAULT(MaxVectorSize) &&
2104 UseAVX > 2 &&
2105 is_intel_cascade_lake();
2106 }
2107 #endif
2108
2109 bool VM_Version::is_intel_cascade_lake() {
2110 return is_intel_skylake() && _stepping >= 5;
2111 }
2112
2113 bool VM_Version::is_intel_darkmont() {
2114 return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2115 }
2116
2117 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2118 // for implementing the array copy and clear operations.
2119 // The Intel platforms that supports the serialize instruction
2120 // has improved implementation of 64-byte load/stores and so the default
2121 // threshold is set to 0 for these platforms.
2122 int VM_Version::avx3_threshold() {
2123 return (is_intel_server_family() &&
2124 supports_serialize() &&
2125 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2126 }
2127
2128 void VM_Version::clear_apx_test_state() {
2129 clear_apx_test_state_stub();
2130 }
2131
2132 static bool _vm_version_initialized = false;
2133
2134 void VM_Version::initialize() {
2135 ResourceMark rm;
2136
2137 // Making this stub must be FIRST use of assembler
2138 stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2139 if (stub_blob == nullptr) {
2140 vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2141 }
2142 CodeBuffer c(stub_blob);
2143 VM_Version_StubGenerator g(&c);
2144
2145 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2146 g.generate_get_cpu_info());
2147 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2148 g.generate_detect_virt());
2149 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2150 g.clear_apx_test_state());
2151 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2152 g.generate_getCPUIDBrandString());
2153 get_processor_features();
2154
2155 Assembler::precompute_instructions();
2156
2157 if (VM_Version::supports_hv()) { // Supports hypervisor
2158 check_virtualizations();
2159 }
2160 _vm_version_initialized = true;
2161 }
2162
2163 typedef enum {
2164 CPU_FAMILY_8086_8088 = 0,
2165 CPU_FAMILY_INTEL_286 = 2,
2166 CPU_FAMILY_INTEL_386 = 3,
2167 CPU_FAMILY_INTEL_486 = 4,
2168 CPU_FAMILY_PENTIUM = 5,
2169 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models
2170 CPU_FAMILY_PENTIUM_4 = 0xF
2171 } FamilyFlag;
2172
2173 typedef enum {
2174 RDTSCP_FLAG = 0x08000000, // bit 27
2175 INTEL64_FLAG = 0x20000000 // bit 29
2176 } _featureExtendedEdxFlag;
2177
2178 typedef enum {
2179 FPU_FLAG = 0x00000001,
2180 VME_FLAG = 0x00000002,
2181 DE_FLAG = 0x00000004,
2182 PSE_FLAG = 0x00000008,
2183 TSC_FLAG = 0x00000010,
2184 MSR_FLAG = 0x00000020,
2185 PAE_FLAG = 0x00000040,
2186 MCE_FLAG = 0x00000080,
2187 CX8_FLAG = 0x00000100,
2188 APIC_FLAG = 0x00000200,
2189 SEP_FLAG = 0x00000800,
2190 MTRR_FLAG = 0x00001000,
2191 PGE_FLAG = 0x00002000,
2192 MCA_FLAG = 0x00004000,
2193 CMOV_FLAG = 0x00008000,
2194 PAT_FLAG = 0x00010000,
2195 PSE36_FLAG = 0x00020000,
2196 PSNUM_FLAG = 0x00040000,
2197 CLFLUSH_FLAG = 0x00080000,
2198 DTS_FLAG = 0x00200000,
2199 ACPI_FLAG = 0x00400000,
2200 MMX_FLAG = 0x00800000,
2201 FXSR_FLAG = 0x01000000,
2202 SSE_FLAG = 0x02000000,
2203 SSE2_FLAG = 0x04000000,
2204 SS_FLAG = 0x08000000,
2205 HTT_FLAG = 0x10000000,
2206 TM_FLAG = 0x20000000
2207 } FeatureEdxFlag;
2208
2209 // VM_Version statics
2210 enum {
2211 ExtendedFamilyIdLength_INTEL = 16,
2212 ExtendedFamilyIdLength_AMD = 24
2213 };
2214
2215 const size_t VENDOR_LENGTH = 13;
2216 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2217 static char* _cpu_brand_string = nullptr;
2218 static int64_t _max_qualified_cpu_frequency = 0;
2219
2220 static int _no_of_threads = 0;
2221 static int _no_of_cores = 0;
2222
2223 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2224 "8086/8088",
2225 "",
2226 "286",
2227 "386",
2228 "486",
2229 "Pentium",
2230 "Pentium Pro", //or Pentium-M/Woodcrest depending on model
2231 "",
2232 "",
2233 "",
2234 "",
2235 "",
2236 "",
2237 "",
2238 "",
2239 "Pentium 4"
2240 };
2241
2242 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2243 "",
2244 "",
2245 "",
2246 "",
2247 "5x86",
2248 "K5/K6",
2249 "Athlon/AthlonXP",
2250 "",
2251 "",
2252 "",
2253 "",
2254 "",
2255 "",
2256 "",
2257 "",
2258 "Opteron/Athlon64",
2259 "Opteron QC/Phenom", // Barcelona et.al.
2260 "",
2261 "",
2262 "",
2263 "",
2264 "",
2265 "",
2266 "Zen"
2267 };
2268 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2269 // September 2013, Vol 3C Table 35-1
2270 const char* const _model_id_pentium_pro[] = {
2271 "",
2272 "Pentium Pro",
2273 "",
2274 "Pentium II model 3",
2275 "",
2276 "Pentium II model 5/Xeon/Celeron",
2277 "Celeron",
2278 "Pentium III/Pentium III Xeon",
2279 "Pentium III/Pentium III Xeon",
2280 "Pentium M model 9", // Yonah
2281 "Pentium III, model A",
2282 "Pentium III, model B",
2283 "",
2284 "Pentium M model D", // Dothan
2285 "",
2286 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2287 "",
2288 "",
2289 "",
2290 "",
2291 "",
2292 "",
2293 "Celeron", // 0x16 Celeron 65nm
2294 "Core 2", // 0x17 Penryn / Harpertown
2295 "",
2296 "",
2297 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP
2298 "Atom", // 0x1B Z5xx series Silverthorn
2299 "",
2300 "Core 2", // 0x1D Dunnington (6-core)
2301 "Nehalem", // 0x1E CPU_MODEL_NEHALEM
2302 "",
2303 "",
2304 "",
2305 "",
2306 "",
2307 "",
2308 "Westmere", // 0x25 CPU_MODEL_WESTMERE
2309 "",
2310 "",
2311 "", // 0x28
2312 "",
2313 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3"
2314 "",
2315 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP
2316 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2317 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX
2318 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX
2319 "",
2320 "",
2321 "",
2322 "",
2323 "",
2324 "",
2325 "",
2326 "",
2327 "",
2328 "",
2329 "Ivy Bridge", // 0x3a
2330 "",
2331 "Haswell", // 0x3c "4th Generation Intel Core Processor"
2332 "", // 0x3d "Next Generation Intel Core Processor"
2333 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2334 "", // 0x3f "Future Generation Intel Xeon Processor"
2335 "",
2336 "",
2337 "",
2338 "",
2339 "",
2340 "Haswell", // 0x45 "4th Generation Intel Core Processor"
2341 "Haswell", // 0x46 "4th Generation Intel Core Processor"
2342 nullptr
2343 };
2344
2345 /* Brand ID is for back compatibility
2346 * Newer CPUs uses the extended brand string */
2347 const char* const _brand_id[] = {
2348 "",
2349 "Celeron processor",
2350 "Pentium III processor",
2351 "Intel Pentium III Xeon processor",
2352 "",
2353 "",
2354 "",
2355 "",
2356 "Intel Pentium 4 processor",
2357 nullptr
2358 };
2359
2360
2361 const char* const _feature_edx_id[] = {
2362 "On-Chip FPU",
2363 "Virtual Mode Extensions",
2364 "Debugging Extensions",
2365 "Page Size Extensions",
2366 "Time Stamp Counter",
2367 "Model Specific Registers",
2368 "Physical Address Extension",
2369 "Machine Check Exceptions",
2370 "CMPXCHG8B Instruction",
2371 "On-Chip APIC",
2372 "",
2373 "Fast System Call",
2374 "Memory Type Range Registers",
2375 "Page Global Enable",
2376 "Machine Check Architecture",
2377 "Conditional Mov Instruction",
2378 "Page Attribute Table",
2379 "36-bit Page Size Extension",
2380 "Processor Serial Number",
2381 "CLFLUSH Instruction",
2382 "",
2383 "Debug Trace Store feature",
2384 "ACPI registers in MSR space",
2385 "Intel Architecture MMX Technology",
2386 "Fast Float Point Save and Restore",
2387 "Streaming SIMD extensions",
2388 "Streaming SIMD extensions 2",
2389 "Self-Snoop",
2390 "Hyper Threading",
2391 "Thermal Monitor",
2392 "",
2393 "Pending Break Enable"
2394 };
2395
2396 const char* const _feature_extended_edx_id[] = {
2397 "",
2398 "",
2399 "",
2400 "",
2401 "",
2402 "",
2403 "",
2404 "",
2405 "",
2406 "",
2407 "",
2408 "SYSCALL/SYSRET",
2409 "",
2410 "",
2411 "",
2412 "",
2413 "",
2414 "",
2415 "",
2416 "",
2417 "Execute Disable Bit",
2418 "",
2419 "",
2420 "",
2421 "",
2422 "",
2423 "",
2424 "RDTSCP",
2425 "",
2426 "Intel 64 Architecture",
2427 "",
2428 ""
2429 };
2430
2431 const char* const _feature_ecx_id[] = {
2432 "Streaming SIMD Extensions 3",
2433 "PCLMULQDQ",
2434 "64-bit DS Area",
2435 "MONITOR/MWAIT instructions",
2436 "CPL Qualified Debug Store",
2437 "Virtual Machine Extensions",
2438 "Safer Mode Extensions",
2439 "Enhanced Intel SpeedStep technology",
2440 "Thermal Monitor 2",
2441 "Supplemental Streaming SIMD Extensions 3",
2442 "L1 Context ID",
2443 "",
2444 "Fused Multiply-Add",
2445 "CMPXCHG16B",
2446 "xTPR Update Control",
2447 "Perfmon and Debug Capability",
2448 "",
2449 "Process-context identifiers",
2450 "Direct Cache Access",
2451 "Streaming SIMD extensions 4.1",
2452 "Streaming SIMD extensions 4.2",
2453 "x2APIC",
2454 "MOVBE",
2455 "Popcount instruction",
2456 "TSC-Deadline",
2457 "AESNI",
2458 "XSAVE",
2459 "OSXSAVE",
2460 "AVX",
2461 "F16C",
2462 "RDRAND",
2463 ""
2464 };
2465
2466 const char* const _feature_extended_ecx_id[] = {
2467 "LAHF/SAHF instruction support",
2468 "Core multi-processor legacy mode",
2469 "",
2470 "",
2471 "",
2472 "Advanced Bit Manipulations: LZCNT",
2473 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2474 "Misaligned SSE mode",
2475 "",
2476 "",
2477 "",
2478 "",
2479 "",
2480 "",
2481 "",
2482 "",
2483 "",
2484 "",
2485 "",
2486 "",
2487 "",
2488 "",
2489 "",
2490 "",
2491 "",
2492 "",
2493 "",
2494 "",
2495 "",
2496 "",
2497 "",
2498 ""
2499 };
2500
2501 const char* VM_Version::cpu_model_description(void) {
2502 uint32_t cpu_family = extended_cpu_family();
2503 uint32_t cpu_model = extended_cpu_model();
2504 const char* model = nullptr;
2505
2506 if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2507 for (uint32_t i = 0; i <= cpu_model; i++) {
2508 model = _model_id_pentium_pro[i];
2509 if (model == nullptr) {
2510 break;
2511 }
2512 }
2513 }
2514 return model;
2515 }
2516
2517 const char* VM_Version::cpu_brand_string(void) {
2518 if (_cpu_brand_string == nullptr) {
2519 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2520 if (nullptr == _cpu_brand_string) {
2521 return nullptr;
2522 }
2523 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2524 if (ret_val != OS_OK) {
2525 FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2526 _cpu_brand_string = nullptr;
2527 }
2528 }
2529 return _cpu_brand_string;
2530 }
2531
2532 const char* VM_Version::cpu_brand(void) {
2533 const char* brand = nullptr;
2534
2535 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2536 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2537 brand = _brand_id[0];
2538 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2539 brand = _brand_id[i];
2540 }
2541 }
2542 return brand;
2543 }
2544
2545 bool VM_Version::cpu_is_em64t(void) {
2546 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2547 }
2548
2549 bool VM_Version::is_netburst(void) {
2550 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2551 }
2552
2553 bool VM_Version::supports_tscinv_ext(void) {
2554 if (!supports_tscinv_bit()) {
2555 return false;
2556 }
2557
2558 if (is_intel()) {
2559 return true;
2560 }
2561
2562 if (is_amd()) {
2563 return !is_amd_Barcelona();
2564 }
2565
2566 if (is_hygon()) {
2567 return true;
2568 }
2569
2570 return false;
2571 }
2572
2573 void VM_Version::resolve_cpu_information_details(void) {
2574
2575 // in future we want to base this information on proper cpu
2576 // and cache topology enumeration such as:
2577 // Intel 64 Architecture Processor Topology Enumeration
2578 // which supports system cpu and cache topology enumeration
2579 // either using 2xAPICIDs or initial APICIDs
2580
2581 // currently only rough cpu information estimates
2582 // which will not necessarily reflect the exact configuration of the system
2583
2584 // this is the number of logical hardware threads
2585 // visible to the operating system
2586 _no_of_threads = os::processor_count();
2587
2588 // find out number of threads per cpu package
2589 int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2590 if (threads_per_package == 0) {
2591 // Fallback code to avoid div by zero in subsequent code.
2592 // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2593 threads_per_package = threads_per_core() * cores_per_cpu();
2594 }
2595
2596 // use amount of threads visible to the process in order to guess number of sockets
2597 _no_of_sockets = _no_of_threads / threads_per_package;
2598
2599 // process might only see a subset of the total number of threads
2600 // from a single processor package. Virtualization/resource management for example.
2601 // If so then just write a hard 1 as num of pkgs.
2602 if (0 == _no_of_sockets) {
2603 _no_of_sockets = 1;
2604 }
2605
2606 // estimate the number of cores
2607 _no_of_cores = cores_per_cpu() * _no_of_sockets;
2608 }
2609
2610
2611 const char* VM_Version::cpu_family_description(void) {
2612 int cpu_family_id = extended_cpu_family();
2613 if (is_amd()) {
2614 if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2615 return _family_id_amd[cpu_family_id];
2616 }
2617 }
2618 if (is_intel()) {
2619 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2620 return cpu_model_description();
2621 }
2622 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2623 return _family_id_intel[cpu_family_id];
2624 }
2625 }
2626 if (is_hygon()) {
2627 return "Dhyana";
2628 }
2629 return "Unknown x86";
2630 }
2631
2632 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2633 assert(buf != nullptr, "buffer is null!");
2634 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2635
2636 const char* cpu_type = nullptr;
2637 const char* x64 = nullptr;
2638
2639 if (is_intel()) {
2640 cpu_type = "Intel";
2641 x64 = cpu_is_em64t() ? " Intel64" : "";
2642 } else if (is_amd()) {
2643 cpu_type = "AMD";
2644 x64 = cpu_is_em64t() ? " AMD64" : "";
2645 } else if (is_hygon()) {
2646 cpu_type = "Hygon";
2647 x64 = cpu_is_em64t() ? " AMD64" : "";
2648 } else {
2649 cpu_type = "Unknown x86";
2650 x64 = cpu_is_em64t() ? " x86_64" : "";
2651 }
2652
2653 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2654 cpu_type,
2655 cpu_family_description(),
2656 supports_ht() ? " (HT)" : "",
2657 supports_sse3() ? " SSE3" : "",
2658 supports_ssse3() ? " SSSE3" : "",
2659 supports_sse4_1() ? " SSE4.1" : "",
2660 supports_sse4_2() ? " SSE4.2" : "",
2661 supports_sse4a() ? " SSE4A" : "",
2662 is_netburst() ? " Netburst" : "",
2663 is_intel_family_core() ? " Core" : "",
2664 x64);
2665
2666 return OS_OK;
2667 }
2668
2669 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2670 assert(buf != nullptr, "buffer is null!");
2671 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2672 assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2673
2674 // invoke newly generated asm code to fetch CPU Brand String
2675 getCPUIDBrandString_stub(&_cpuid_info);
2676
2677 // fetch results into buffer
2678 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0;
2679 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1;
2680 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2;
2681 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2682 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2683 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2684 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2685 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2686 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2687 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2688 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2689 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2690
2691 return OS_OK;
2692 }
2693
2694 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2695 guarantee(buf != nullptr, "buffer is null!");
2696 guarantee(buf_len > 0, "buffer len not enough!");
2697
2698 unsigned int flag = 0;
2699 unsigned int fi = 0;
2700 size_t written = 0;
2701 const char* prefix = "";
2702
2703 #define WRITE_TO_BUF(string) \
2704 { \
2705 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2706 if (res < 0) { \
2707 return buf_len - 1; \
2708 } \
2709 written += res; \
2710 if (prefix[0] == '\0') { \
2711 prefix = ", "; \
2712 } \
2713 }
2714
2715 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2716 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2717 continue; /* no hyperthreading */
2718 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2719 continue; /* no fast system call */
2720 }
2721 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2722 WRITE_TO_BUF(_feature_edx_id[fi]);
2723 }
2724 }
2725
2726 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2727 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2728 WRITE_TO_BUF(_feature_ecx_id[fi]);
2729 }
2730 }
2731
2732 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2733 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2734 WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2735 }
2736 }
2737
2738 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2739 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2740 WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2741 }
2742 }
2743
2744 if (supports_tscinv_bit()) {
2745 WRITE_TO_BUF("Invariant TSC");
2746 }
2747
2748 if (supports_hybrid()) {
2749 WRITE_TO_BUF("Hybrid Architecture");
2750 }
2751
2752 return written;
2753 }
2754
2755 /**
2756 * Write a detailed description of the cpu to a given buffer, including
2757 * feature set.
2758 */
2759 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2760 assert(buf != nullptr, "buffer is null!");
2761 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2762
2763 static const char* unknown = "<unknown>";
2764 char vendor_id[VENDOR_LENGTH];
2765 const char* family = nullptr;
2766 const char* model = nullptr;
2767 const char* brand = nullptr;
2768 int outputLen = 0;
2769
2770 family = cpu_family_description();
2771 if (family == nullptr) {
2772 family = unknown;
2773 }
2774
2775 model = cpu_model_description();
2776 if (model == nullptr) {
2777 model = unknown;
2778 }
2779
2780 brand = cpu_brand_string();
2781
2782 if (brand == nullptr) {
2783 brand = cpu_brand();
2784 if (brand == nullptr) {
2785 brand = unknown;
2786 }
2787 }
2788
2789 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2790 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2791 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2792 vendor_id[VENDOR_LENGTH-1] = '\0';
2793
2794 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2795 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2796 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2797 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2798 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2799 "Supports: ",
2800 brand,
2801 vendor_id,
2802 family,
2803 extended_cpu_family(),
2804 model,
2805 extended_cpu_model(),
2806 cpu_stepping(),
2807 _cpuid_info.std_cpuid1_eax.bits.ext_family,
2808 _cpuid_info.std_cpuid1_eax.bits.ext_model,
2809 _cpuid_info.std_cpuid1_eax.bits.proc_type,
2810 _cpuid_info.std_cpuid1_eax.value,
2811 _cpuid_info.std_cpuid1_ebx.value,
2812 _cpuid_info.std_cpuid1_ecx.value,
2813 _cpuid_info.std_cpuid1_edx.value,
2814 _cpuid_info.ext_cpuid1_eax,
2815 _cpuid_info.ext_cpuid1_ebx,
2816 _cpuid_info.ext_cpuid1_ecx,
2817 _cpuid_info.ext_cpuid1_edx);
2818
2819 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2820 if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2821 return OS_ERR;
2822 }
2823
2824 cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2825
2826 return OS_OK;
2827 }
2828
2829
2830 // Fill in Abstract_VM_Version statics
2831 void VM_Version::initialize_cpu_information() {
2832 assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2833 assert(!_initialized, "shouldn't be initialized yet");
2834 resolve_cpu_information_details();
2835
2836 // initialize cpu_name and cpu_desc
2837 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2838 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2839 _initialized = true;
2840 }
2841
2842 /**
2843 * For information about extracting the frequency from the cpu brand string, please see:
2844 *
2845 * Intel Processor Identification and the CPUID Instruction
2846 * Application Note 485
2847 * May 2012
2848 *
2849 * The return value is the frequency in Hz.
2850 */
2851 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2852 const char* const brand_string = cpu_brand_string();
2853 if (brand_string == nullptr) {
2854 return 0;
2855 }
2856 const int64_t MEGA = 1000000;
2857 int64_t multiplier = 0;
2858 int64_t frequency = 0;
2859 uint8_t idx = 0;
2860 // The brand string buffer is at most 48 bytes.
2861 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2862 for (; idx < 48-2; ++idx) {
2863 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2864 // Search brand string for "yHz" where y is M, G, or T.
2865 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2866 if (brand_string[idx] == 'M') {
2867 multiplier = MEGA;
2868 } else if (brand_string[idx] == 'G') {
2869 multiplier = MEGA * 1000;
2870 } else if (brand_string[idx] == 'T') {
2871 multiplier = MEGA * MEGA;
2872 }
2873 break;
2874 }
2875 }
2876 if (multiplier > 0) {
2877 // Compute frequency (in Hz) from brand string.
2878 if (brand_string[idx-3] == '.') { // if format is "x.xx"
2879 frequency = (brand_string[idx-4] - '0') * multiplier;
2880 frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2881 frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2882 } else { // format is "xxxx"
2883 frequency = (brand_string[idx-4] - '0') * 1000;
2884 frequency += (brand_string[idx-3] - '0') * 100;
2885 frequency += (brand_string[idx-2] - '0') * 10;
2886 frequency += (brand_string[idx-1] - '0');
2887 frequency *= multiplier;
2888 }
2889 }
2890 return frequency;
2891 }
2892
2893
2894 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2895 if (_max_qualified_cpu_frequency == 0) {
2896 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2897 }
2898 return _max_qualified_cpu_frequency;
2899 }
2900
2901 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2902 VM_Features vm_features;
2903 if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2904 vm_features.set_feature(CPU_CX8);
2905 if (std_cpuid1_edx.bits.cmov != 0)
2906 vm_features.set_feature(CPU_CMOV);
2907 if (std_cpuid1_edx.bits.clflush != 0)
2908 vm_features.set_feature(CPU_FLUSH);
2909 // clflush should always be available on x86_64
2910 // if not we are in real trouble because we rely on it
2911 // to flush the code cache.
2912 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2913 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2914 ext_cpuid1_edx.bits.fxsr != 0))
2915 vm_features.set_feature(CPU_FXSR);
2916 // HT flag is set for multi-core processors also.
2917 if (threads_per_core() > 1)
2918 vm_features.set_feature(CPU_HT);
2919 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2920 ext_cpuid1_edx.bits.mmx != 0))
2921 vm_features.set_feature(CPU_MMX);
2922 if (std_cpuid1_edx.bits.sse != 0)
2923 vm_features.set_feature(CPU_SSE);
2924 if (std_cpuid1_edx.bits.sse2 != 0)
2925 vm_features.set_feature(CPU_SSE2);
2926 if (std_cpuid1_ecx.bits.sse3 != 0)
2927 vm_features.set_feature(CPU_SSE3);
2928 if (std_cpuid1_ecx.bits.ssse3 != 0)
2929 vm_features.set_feature(CPU_SSSE3);
2930 if (std_cpuid1_ecx.bits.sse4_1 != 0)
2931 vm_features.set_feature(CPU_SSE4_1);
2932 if (std_cpuid1_ecx.bits.sse4_2 != 0)
2933 vm_features.set_feature(CPU_SSE4_2);
2934 if (std_cpuid1_ecx.bits.popcnt != 0)
2935 vm_features.set_feature(CPU_POPCNT);
2936 if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2937 xem_xcr0_eax.bits.apx_f != 0 &&
2938 std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2939 vm_features.set_feature(CPU_APX_F);
2940 }
2941 if (std_cpuid1_ecx.bits.avx != 0 &&
2942 std_cpuid1_ecx.bits.osxsave != 0 &&
2943 xem_xcr0_eax.bits.sse != 0 &&
2944 xem_xcr0_eax.bits.ymm != 0) {
2945 vm_features.set_feature(CPU_AVX);
2946 vm_features.set_feature(CPU_VZEROUPPER);
2947 if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2948 vm_features.set_feature(CPU_SHA512);
2949 if (std_cpuid1_ecx.bits.f16c != 0)
2950 vm_features.set_feature(CPU_F16C);
2951 if (sef_cpuid7_ebx.bits.avx2 != 0) {
2952 vm_features.set_feature(CPU_AVX2);
2953 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2954 vm_features.set_feature(CPU_AVX_IFMA);
2955 }
2956 if (sef_cpuid7_ecx.bits.gfni != 0)
2957 vm_features.set_feature(CPU_GFNI);
2958 if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2959 xem_xcr0_eax.bits.opmask != 0 &&
2960 xem_xcr0_eax.bits.zmm512 != 0 &&
2961 xem_xcr0_eax.bits.zmm32 != 0) {
2962 vm_features.set_feature(CPU_AVX512F);
2963 if (sef_cpuid7_ebx.bits.avx512cd != 0)
2964 vm_features.set_feature(CPU_AVX512CD);
2965 if (sef_cpuid7_ebx.bits.avx512dq != 0)
2966 vm_features.set_feature(CPU_AVX512DQ);
2967 if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2968 vm_features.set_feature(CPU_AVX512_IFMA);
2969 if (sef_cpuid7_ebx.bits.avx512pf != 0)
2970 vm_features.set_feature(CPU_AVX512PF);
2971 if (sef_cpuid7_ebx.bits.avx512er != 0)
2972 vm_features.set_feature(CPU_AVX512ER);
2973 if (sef_cpuid7_ebx.bits.avx512bw != 0)
2974 vm_features.set_feature(CPU_AVX512BW);
2975 if (sef_cpuid7_ebx.bits.avx512vl != 0)
2976 vm_features.set_feature(CPU_AVX512VL);
2977 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2978 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2979 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2980 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2981 if (sef_cpuid7_ecx.bits.vaes != 0)
2982 vm_features.set_feature(CPU_AVX512_VAES);
2983 if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2984 vm_features.set_feature(CPU_AVX512_VNNI);
2985 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2986 vm_features.set_feature(CPU_AVX512_BITALG);
2987 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2988 vm_features.set_feature(CPU_AVX512_VBMI);
2989 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2990 vm_features.set_feature(CPU_AVX512_VBMI2);
2991 }
2992 if (is_intel()) {
2993 if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2994 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2995 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2996 xem_xcr0_eax.bits.opmask != 0 &&
2997 xem_xcr0_eax.bits.zmm512 != 0 &&
2998 xem_xcr0_eax.bits.zmm32 != 0) {
2999 vm_features.set_feature(CPU_AVX10_1);
3000 vm_features.set_feature(CPU_AVX512F);
3001 vm_features.set_feature(CPU_AVX512CD);
3002 vm_features.set_feature(CPU_AVX512DQ);
3003 vm_features.set_feature(CPU_AVX512PF);
3004 vm_features.set_feature(CPU_AVX512ER);
3005 vm_features.set_feature(CPU_AVX512BW);
3006 vm_features.set_feature(CPU_AVX512VL);
3007 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3008 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3009 vm_features.set_feature(CPU_AVX512_VAES);
3010 vm_features.set_feature(CPU_AVX512_VNNI);
3011 vm_features.set_feature(CPU_AVX512_BITALG);
3012 vm_features.set_feature(CPU_AVX512_VBMI);
3013 vm_features.set_feature(CPU_AVX512_VBMI2);
3014 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3015 vm_features.set_feature(CPU_AVX10_2);
3016 }
3017 }
3018 }
3019 }
3020
3021 if (std_cpuid1_ecx.bits.hv != 0)
3022 vm_features.set_feature(CPU_HV);
3023 if (sef_cpuid7_ebx.bits.bmi1 != 0)
3024 vm_features.set_feature(CPU_BMI1);
3025 if (std_cpuid1_edx.bits.tsc != 0)
3026 vm_features.set_feature(CPU_TSC);
3027 if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3028 vm_features.set_feature(CPU_TSCINV_BIT);
3029 if (std_cpuid1_ecx.bits.aes != 0)
3030 vm_features.set_feature(CPU_AES);
3031 if (ext_cpuid1_ecx.bits.lzcnt != 0)
3032 vm_features.set_feature(CPU_LZCNT);
3033 if (ext_cpuid1_ecx.bits.prefetchw != 0)
3034 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3035 if (sef_cpuid7_ebx.bits.erms != 0)
3036 vm_features.set_feature(CPU_ERMS);
3037 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3038 vm_features.set_feature(CPU_FSRM);
3039 if (std_cpuid1_ecx.bits.clmul != 0)
3040 vm_features.set_feature(CPU_CLMUL);
3041 if (sef_cpuid7_ebx.bits.rtm != 0)
3042 vm_features.set_feature(CPU_RTM);
3043 if (sef_cpuid7_ebx.bits.adx != 0)
3044 vm_features.set_feature(CPU_ADX);
3045 if (sef_cpuid7_ebx.bits.bmi2 != 0)
3046 vm_features.set_feature(CPU_BMI2);
3047 if (sef_cpuid7_ebx.bits.sha != 0)
3048 vm_features.set_feature(CPU_SHA);
3049 if (std_cpuid1_ecx.bits.fma != 0)
3050 vm_features.set_feature(CPU_FMA);
3051 if (sef_cpuid7_ebx.bits.clflushopt != 0)
3052 vm_features.set_feature(CPU_FLUSHOPT);
3053 if (sef_cpuid7_ebx.bits.clwb != 0)
3054 vm_features.set_feature(CPU_CLWB);
3055 if (ext_cpuid1_edx.bits.rdtscp != 0)
3056 vm_features.set_feature(CPU_RDTSCP);
3057 if (sef_cpuid7_ecx.bits.rdpid != 0)
3058 vm_features.set_feature(CPU_RDPID);
3059
3060 // AMD|Hygon additional features.
3061 if (is_amd_family()) {
3062 // PREFETCHW was checked above, check TDNOW here.
3063 if ((ext_cpuid1_edx.bits.tdnow != 0))
3064 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3065 if (ext_cpuid1_ecx.bits.sse4a != 0)
3066 vm_features.set_feature(CPU_SSE4A);
3067 }
3068
3069 // Intel additional features.
3070 if (is_intel()) {
3071 if (sef_cpuid7_edx.bits.serialize != 0)
3072 vm_features.set_feature(CPU_SERIALIZE);
3073 if (sef_cpuid7_edx.bits.hybrid != 0)
3074 vm_features.set_feature(CPU_HYBRID);
3075 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3076 vm_features.set_feature(CPU_AVX512_FP16);
3077 }
3078
3079 // ZX additional features.
3080 if (is_zx()) {
3081 // We do not know if these are supported by ZX, so we cannot trust
3082 // common CPUID bit for them.
3083 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3084 vm_features.clear_feature(CPU_CLWB);
3085 }
3086
3087 // Protection key features.
3088 if (sef_cpuid7_ecx.bits.pku != 0) {
3089 vm_features.set_feature(CPU_PKU);
3090 }
3091 if (sef_cpuid7_ecx.bits.ospke != 0) {
3092 vm_features.set_feature(CPU_OSPKE);
3093 }
3094
3095 // Control flow enforcement (CET) features.
3096 if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3097 vm_features.set_feature(CPU_CET_SS);
3098 }
3099 if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3100 vm_features.set_feature(CPU_CET_IBT);
3101 }
3102
3103 // Composite features.
3104 if (supports_tscinv_bit() &&
3105 ((is_amd_family() && !is_amd_Barcelona()) ||
3106 is_intel_tsc_synched_at_init())) {
3107 vm_features.set_feature(CPU_TSCINV);
3108 }
3109 return vm_features;
3110 }
3111
3112 bool VM_Version::os_supports_avx_vectors() {
3113 bool retVal = false;
3114 int nreg = 4;
3115 if (supports_evex()) {
3116 // Verify that OS save/restore all bits of EVEX registers
3117 // during signal processing.
3118 retVal = true;
3119 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3120 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3121 retVal = false;
3122 break;
3123 }
3124 }
3125 } else if (supports_avx()) {
3126 // Verify that OS save/restore all bits of AVX registers
3127 // during signal processing.
3128 retVal = true;
3129 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3130 if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3131 retVal = false;
3132 break;
3133 }
3134 }
3135 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3136 if (retVal == false) {
3137 // Verify that OS save/restore all bits of EVEX registers
3138 // during signal processing.
3139 retVal = true;
3140 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3141 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3142 retVal = false;
3143 break;
3144 }
3145 }
3146 }
3147 }
3148 return retVal;
3149 }
3150
3151 bool VM_Version::os_supports_apx_egprs() {
3152 if (!supports_apx_f()) {
3153 return false;
3154 }
3155 if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3156 _cpuid_info.apx_save[1] != egpr_test_value()) {
3157 return false;
3158 }
3159 return true;
3160 }
3161
3162 uint VM_Version::cores_per_cpu() {
3163 uint result = 1;
3164 if (is_intel()) {
3165 bool supports_topology = supports_processor_topology();
3166 if (supports_topology) {
3167 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3168 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3169 }
3170 if (!supports_topology || result == 0) {
3171 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3172 }
3173 } else if (is_amd_family()) {
3174 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3175 if (cpu_family() >= 0x17) { // Zen or later
3176 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3177 }
3178 } else if (is_zx()) {
3179 bool supports_topology = supports_processor_topology();
3180 if (supports_topology) {
3181 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3182 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3183 }
3184 if (!supports_topology || result == 0) {
3185 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3186 }
3187 }
3188 return result;
3189 }
3190
3191 uint VM_Version::threads_per_core() {
3192 uint result = 1;
3193 if (is_intel() && supports_processor_topology()) {
3194 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3195 } else if (is_zx() && supports_processor_topology()) {
3196 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3197 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3198 if (cpu_family() >= 0x17) {
3199 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3200 } else {
3201 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3202 cores_per_cpu();
3203 }
3204 }
3205 return (result == 0 ? 1 : result);
3206 }
3207
3208 uint VM_Version::L1_line_size() {
3209 uint result = 0;
3210 if (is_intel()) {
3211 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3212 } else if (is_amd_family()) {
3213 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3214 } else if (is_zx()) {
3215 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3216 }
3217 if (result < 32) // not defined ?
3218 result = 32; // 32 bytes by default on x86 and other x64
3219 return result;
3220 }
3221
3222 bool VM_Version::is_intel_tsc_synched_at_init() {
3223 if (is_intel_family_core()) {
3224 uint32_t ext_model = extended_cpu_model();
3225 if (ext_model == CPU_MODEL_NEHALEM_EP ||
3226 ext_model == CPU_MODEL_WESTMERE_EP ||
3227 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3228 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3229 // <= 2-socket invariant tsc support. EX versions are usually used
3230 // in > 2-socket systems and likely don't synchronize tscs at
3231 // initialization.
3232 // Code that uses tsc values must be prepared for them to arbitrarily
3233 // jump forward or backward.
3234 return true;
3235 }
3236 }
3237 return false;
3238 }
3239
3240 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3241 // Hardware prefetching (distance/size in bytes):
3242 // Pentium 3 - 64 / 32
3243 // Pentium 4 - 256 / 128
3244 // Athlon - 64 / 32 ????
3245 // Opteron - 128 / 64 only when 2 sequential cache lines accessed
3246 // Core - 128 / 64
3247 //
3248 // Software prefetching (distance in bytes / instruction with best score):
3249 // Pentium 3 - 128 / prefetchnta
3250 // Pentium 4 - 512 / prefetchnta
3251 // Athlon - 128 / prefetchnta
3252 // Opteron - 256 / prefetchnta
3253 // Core - 256 / prefetchnta
3254 // It will be used only when AllocatePrefetchStyle > 0
3255
3256 if (is_amd_family()) { // AMD | Hygon
3257 if (supports_sse2()) {
3258 return 256; // Opteron
3259 } else {
3260 return 128; // Athlon
3261 }
3262 } else { // Intel
3263 if (supports_sse3() && is_intel_server_family()) {
3264 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3265 return 192;
3266 } else if (use_watermark_prefetch) { // watermark prefetching on Core
3267 return 384;
3268 }
3269 }
3270 if (supports_sse2()) {
3271 if (is_intel_server_family()) {
3272 return 256; // Pentium M, Core, Core2
3273 } else {
3274 return 512; // Pentium 4
3275 }
3276 } else {
3277 return 128; // Pentium 3 (and all other old CPUs)
3278 }
3279 }
3280 }
3281
3282 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3283 assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3284 switch (id) {
3285 case vmIntrinsics::_floatToFloat16:
3286 case vmIntrinsics::_float16ToFloat:
3287 if (!supports_float16()) {
3288 return false;
3289 }
3290 break;
3291 default:
3292 break;
3293 }
3294 return true;
3295 }
3296
3297 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3298 int i = 0;
3299 ss.join([&]() {
3300 const char* str = nullptr;
3301 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3302 if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3303 str = _features_names[i];
3304 }
3305 i += 1;
3306 }
3307 return str;
3308 }, ", ");
3309 }
3310
3311 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3312 VM_Features* features = (VM_Features*)features_buffer;
3313 insert_features_names(*features, ss);
3314 }
3315
3316 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3317 VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3318 VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3319 int i = 0;
3320 ss.join([&]() {
3321 const char* str = nullptr;
3322 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3323 Feature_Flag flag = (Feature_Flag)i;
3324 if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3325 str = _features_names[i];
3326 }
3327 i += 1;
3328 }
3329 return str;
3330 }, ", ");
3331 }
3332
3333 int VM_Version::cpu_features_size() {
3334 return sizeof(VM_Features);
3335 }
3336
3337 void VM_Version::store_cpu_features(void* buf) {
3338 VM_Features copy = _features;
3339 copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
3340 memcpy(buf, ©, sizeof(VM_Features));
3341 }
3342
3343 bool VM_Version::supports_features(void* features_buffer) {
3344 VM_Features* features_to_test = (VM_Features*)features_buffer;
3345 return _features.supports_features(features_to_test);
3346 }