1 /*
2 * Copyright (c) 1997, 2026, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/codeBlob.hpp"
29 #include "compiler/compilerDefinitions.inline.hpp"
30 #include "jvm.h"
31 #include "logging/log.hpp"
32 #include "logging/logStream.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "memory/universe.hpp"
35 #include "runtime/globals_extension.hpp"
36 #include "runtime/icache.hpp"
37 #include "runtime/java.hpp"
38 #include "runtime/os.inline.hpp"
39 #include "runtime/stubCodeGenerator.hpp"
40 #include "runtime/vm_version.hpp"
41 #include "utilities/checkedCast.hpp"
42 #include "utilities/ostream.hpp"
43 #include "utilities/powerOfTwo.hpp"
44 #include "utilities/virtualizationSupport.hpp"
45
46 int VM_Version::_cpu;
47 int VM_Version::_model;
48 int VM_Version::_stepping;
49 bool VM_Version::_has_intel_jcc_erratum;
50 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
51
52 #define DECLARE_CPU_FEATURE_NAME(id, name) XSTR(name),
53 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
54 #undef DECLARE_CPU_FEATURE_NAME
55
56 // Address of instruction which causes SEGV
57 address VM_Version::_cpuinfo_segv_addr = nullptr;
58 // Address of instruction after the one which causes SEGV
59 address VM_Version::_cpuinfo_cont_addr = nullptr;
60 // Address of instruction which causes APX specific SEGV
61 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
62 // Address of instruction after the one which causes APX specific SEGV
63 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
64
65 static BufferBlob* stub_blob;
66 static const int stub_size = 2550;
67
68 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
69
70 VM_Version::VM_Features VM_Version::_features;
71 VM_Version::VM_Features VM_Version::_cpu_features;
72
73 extern "C" {
74 typedef void (*get_cpu_info_stub_t)(void*);
75 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
76 typedef void (*clear_apx_test_state_t)(void);
77 typedef void (*getCPUIDBrandString_stub_t)(void*);
78 }
79 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
80 static detect_virt_stub_t detect_virt_stub = nullptr;
81 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
82 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
83
84 #define CPUID_STANDARD_FN 0x0
85 #define CPUID_STANDARD_FN_1 0x1
86 #define CPUID_STANDARD_FN_4 0x4
87 #define CPUID_STANDARD_FN_B 0xb
88
89 #define CPUID_EXTENDED_FN 0x80000000
90 #define CPUID_EXTENDED_FN_1 0x80000001
91 #define CPUID_EXTENDED_FN_2 0x80000002
92 #define CPUID_EXTENDED_FN_3 0x80000003
93 #define CPUID_EXTENDED_FN_4 0x80000004
94 #define CPUID_EXTENDED_FN_7 0x80000007
95 #define CPUID_EXTENDED_FN_8 0x80000008
96
97 class VM_Version_StubGenerator: public StubCodeGenerator {
98 public:
99
100 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
101
102 address clear_apx_test_state() {
103 # define __ _masm->
104 address start = __ pc();
105 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
106 // handling guarantees that preserved register values post signal handling were
107 // re-instantiated by operating system and not because they were not modified externally.
108
109 bool save_apx = UseAPX;
110 VM_Version::set_apx_cpuFeatures();
111 UseAPX = true;
112 // EGPR state save/restoration.
113 __ mov64(r16, 0L);
114 __ mov64(r31, 0L);
115 UseAPX = save_apx;
116 VM_Version::clean_cpuFeatures();
117 __ ret(0);
118 return start;
119 }
120
121 address generate_get_cpu_info() {
122 // Flags to test CPU type.
123 const uint32_t HS_EFL_AC = 0x40000;
124 const uint32_t HS_EFL_ID = 0x200000;
125 // Values for when we don't have a CPUID instruction.
126 const int CPU_FAMILY_SHIFT = 8;
127 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
128 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
129 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
130
131 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
132 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
133 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning, apx_xstate;
134 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
135
136 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
137 # define __ _masm->
138
139 address start = __ pc();
140
141 //
142 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
143 //
144 // rcx and rdx are first and second argument registers on windows
145
146 __ push(rbp);
147 __ mov(rbp, c_rarg0); // cpuid_info address
148 __ push(rbx);
149 __ push(rsi);
150 __ pushf(); // preserve rbx, and flags
151 __ pop(rax);
152 __ push(rax);
153 __ mov(rcx, rax);
154 //
155 // if we are unable to change the AC flag, we have a 386
156 //
157 __ xorl(rax, HS_EFL_AC);
158 __ push(rax);
159 __ popf();
160 __ pushf();
161 __ pop(rax);
162 __ cmpptr(rax, rcx);
163 __ jccb(Assembler::notEqual, detect_486);
164
165 __ movl(rax, CPU_FAMILY_386);
166 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
167 __ jmp(done);
168
169 //
170 // If we are unable to change the ID flag, we have a 486 which does
171 // not support the "cpuid" instruction.
172 //
173 __ bind(detect_486);
174 __ mov(rax, rcx);
175 __ xorl(rax, HS_EFL_ID);
176 __ push(rax);
177 __ popf();
178 __ pushf();
179 __ pop(rax);
180 __ cmpptr(rcx, rax);
181 __ jccb(Assembler::notEqual, detect_586);
182
183 __ bind(cpu486);
184 __ movl(rax, CPU_FAMILY_486);
185 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
186 __ jmp(done);
187
188 //
189 // At this point, we have a chip which supports the "cpuid" instruction
190 //
191 __ bind(detect_586);
192 __ xorl(rax, rax);
193 __ cpuid();
194 __ orl(rax, rax);
195 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
196 // value of at least 1, we give up and
197 // assume a 486
198 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
199 __ movl(Address(rsi, 0), rax);
200 __ movl(Address(rsi, 4), rbx);
201 __ movl(Address(rsi, 8), rcx);
202 __ movl(Address(rsi,12), rdx);
203
204 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
205 __ jccb(Assembler::belowEqual, std_cpuid4);
206
207 //
208 // cpuid(0xB) Processor Topology
209 //
210 __ movl(rax, 0xb);
211 __ xorl(rcx, rcx); // Threads level
212 __ cpuid();
213
214 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
215 __ movl(Address(rsi, 0), rax);
216 __ movl(Address(rsi, 4), rbx);
217 __ movl(Address(rsi, 8), rcx);
218 __ movl(Address(rsi,12), rdx);
219
220 __ movl(rax, 0xb);
221 __ movl(rcx, 1); // Cores level
222 __ cpuid();
223 __ push(rax);
224 __ andl(rax, 0x1f); // Determine if valid topology level
225 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
226 __ andl(rax, 0xffff);
227 __ pop(rax);
228 __ jccb(Assembler::equal, std_cpuid4);
229
230 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
231 __ movl(Address(rsi, 0), rax);
232 __ movl(Address(rsi, 4), rbx);
233 __ movl(Address(rsi, 8), rcx);
234 __ movl(Address(rsi,12), rdx);
235
236 __ movl(rax, 0xb);
237 __ movl(rcx, 2); // Packages level
238 __ cpuid();
239 __ push(rax);
240 __ andl(rax, 0x1f); // Determine if valid topology level
241 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
242 __ andl(rax, 0xffff);
243 __ pop(rax);
244 __ jccb(Assembler::equal, std_cpuid4);
245
246 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
247 __ movl(Address(rsi, 0), rax);
248 __ movl(Address(rsi, 4), rbx);
249 __ movl(Address(rsi, 8), rcx);
250 __ movl(Address(rsi,12), rdx);
251
252 //
253 // cpuid(0x4) Deterministic cache params
254 //
255 __ bind(std_cpuid4);
256 __ movl(rax, 4);
257 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
258 __ jccb(Assembler::greater, std_cpuid1);
259
260 __ xorl(rcx, rcx); // L1 cache
261 __ cpuid();
262 __ push(rax);
263 __ andl(rax, 0x1f); // Determine if valid cache parameters used
264 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
265 __ pop(rax);
266 __ jccb(Assembler::equal, std_cpuid1);
267
268 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
269 __ movl(Address(rsi, 0), rax);
270 __ movl(Address(rsi, 4), rbx);
271 __ movl(Address(rsi, 8), rcx);
272 __ movl(Address(rsi,12), rdx);
273
274 //
275 // Standard cpuid(0x1)
276 //
277 __ bind(std_cpuid1);
278 __ movl(rax, 1);
279 __ cpuid();
280 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
281 __ movl(Address(rsi, 0), rax);
282 __ movl(Address(rsi, 4), rbx);
283 __ movl(Address(rsi, 8), rcx);
284 __ movl(Address(rsi,12), rdx);
285
286 //
287 // Check if OS has enabled XGETBV instruction to access XCR0
288 // (OSXSAVE feature flag) and CPU supports AVX
289 //
290 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
291 __ cmpl(rcx, 0x18000000);
292 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
293
294 //
295 // XCR0, XFEATURE_ENABLED_MASK register
296 //
297 __ xorl(rcx, rcx); // zero for XCR0 register
298 __ xgetbv();
299 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
300 __ movl(Address(rsi, 0), rax);
301 __ movl(Address(rsi, 4), rdx);
302
303 //
304 // cpuid(0x7) Structured Extended Features Enumeration Leaf.
305 //
306 __ bind(sef_cpuid);
307 __ movl(rax, 7);
308 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
309 __ jccb(Assembler::greater, ext_cpuid);
310 // ECX = 0
311 __ xorl(rcx, rcx);
312 __ cpuid();
313 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
314 __ movl(Address(rsi, 0), rax);
315 __ movl(Address(rsi, 4), rbx);
316 __ movl(Address(rsi, 8), rcx);
317 __ movl(Address(rsi, 12), rdx);
318
319 //
320 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
321 //
322 __ bind(sefsl1_cpuid);
323 __ movl(rax, 7);
324 __ movl(rcx, 1);
325 __ cpuid();
326 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
327 __ movl(Address(rsi, 0), rax);
328 __ movl(Address(rsi, 4), rdx);
329
330 //
331 // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
332 //
333 __ bind(std_cpuid29);
334 __ movl(rax, 0x29);
335 __ movl(rcx, 0);
336 __ cpuid();
337 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
338 __ movl(Address(rsi, 0), rbx);
339
340 //
341 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
342 //
343 __ bind(std_cpuid24);
344 __ movl(rax, 0x24);
345 __ movl(rcx, 0);
346 __ cpuid();
347 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
348 __ movl(Address(rsi, 0), rax);
349 __ movl(Address(rsi, 4), rbx);
350
351 //
352 // Extended cpuid(0x80000000)
353 //
354 __ bind(ext_cpuid);
355 __ movl(rax, 0x80000000);
356 __ cpuid();
357 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
358 __ jcc(Assembler::belowEqual, done);
359 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
360 __ jcc(Assembler::belowEqual, ext_cpuid1);
361 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
362 __ jccb(Assembler::belowEqual, ext_cpuid5);
363 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
364 __ jccb(Assembler::belowEqual, ext_cpuid7);
365 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
366 __ jccb(Assembler::belowEqual, ext_cpuid8);
367 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
368 __ jccb(Assembler::below, ext_cpuid8);
369 //
370 // Extended cpuid(0x8000001E)
371 //
372 __ movl(rax, 0x8000001E);
373 __ cpuid();
374 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
375 __ movl(Address(rsi, 0), rax);
376 __ movl(Address(rsi, 4), rbx);
377 __ movl(Address(rsi, 8), rcx);
378 __ movl(Address(rsi,12), rdx);
379
380 //
381 // Extended cpuid(0x80000008)
382 //
383 __ bind(ext_cpuid8);
384 __ movl(rax, 0x80000008);
385 __ cpuid();
386 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
387 __ movl(Address(rsi, 0), rax);
388 __ movl(Address(rsi, 4), rbx);
389 __ movl(Address(rsi, 8), rcx);
390 __ movl(Address(rsi,12), rdx);
391
392 //
393 // Extended cpuid(0x80000007)
394 //
395 __ bind(ext_cpuid7);
396 __ movl(rax, 0x80000007);
397 __ cpuid();
398 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
399 __ movl(Address(rsi, 0), rax);
400 __ movl(Address(rsi, 4), rbx);
401 __ movl(Address(rsi, 8), rcx);
402 __ movl(Address(rsi,12), rdx);
403
404 //
405 // Extended cpuid(0x80000005)
406 //
407 __ bind(ext_cpuid5);
408 __ movl(rax, 0x80000005);
409 __ cpuid();
410 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
411 __ movl(Address(rsi, 0), rax);
412 __ movl(Address(rsi, 4), rbx);
413 __ movl(Address(rsi, 8), rcx);
414 __ movl(Address(rsi,12), rdx);
415
416 //
417 // Extended cpuid(0x80000001)
418 //
419 __ bind(ext_cpuid1);
420 __ movl(rax, 0x80000001);
421 __ cpuid();
422 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
423 __ movl(Address(rsi, 0), rax);
424 __ movl(Address(rsi, 4), rbx);
425 __ movl(Address(rsi, 8), rcx);
426 __ movl(Address(rsi,12), rdx);
427
428 //
429 // Check if OS has enabled XGETBV instruction to access XCR0
430 // (OSXSAVE feature flag) and CPU supports APX
431 //
432 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
433 // and XCRO[19] bit for OS support to save/restore extended GPR state.
434 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
435 __ movl(rax, 0x200000);
436 __ andl(rax, Address(rsi, 4));
437 __ jcc(Assembler::equal, vector_save_restore);
438 // check _cpuid_info.xem_xcr0_eax.bits.apx_f
439 __ movl(rax, 0x80000);
440 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
441 __ jcc(Assembler::equal, vector_save_restore);
442
443 bool save_apx = UseAPX;
444 VM_Version::set_apx_cpuFeatures();
445 UseAPX = true;
446 __ mov64(r16, VM_Version::egpr_test_value());
447 __ mov64(r31, VM_Version::egpr_test_value());
448 __ xorl(rsi, rsi);
449 VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
450 // Generate SEGV
451 __ movl(rax, Address(rsi, 0));
452
453 VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
454 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
455 __ movq(Address(rsi, 0), r16);
456 __ movq(Address(rsi, 8), r31);
457
458 //
459 // Query CPUID 0xD.19 for APX XSAVE offset
460 // Extended State Enumeration Sub-leaf 19 (APX)
461 // EAX = size of APX state (should be 128)
462 // EBX = offset in standard XSAVE format
463 //
464 __ movl(rax, 0xD);
465 __ movl(rcx, 19);
466 __ cpuid();
467 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_size_offset())));
468 __ movl(Address(rsi, 0), rax);
469 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_xstate_offset_offset())));
470 __ movl(Address(rsi, 0), rbx);
471
472 UseAPX = save_apx;
473 __ bind(vector_save_restore);
474 //
475 // Check if OS has enabled XGETBV instruction to access XCR0
476 // (OSXSAVE feature flag) and CPU supports AVX
477 //
478 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
479 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
480 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
481 __ cmpl(rcx, 0x18000000);
482 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
483
484 __ movl(rax, 0x6);
485 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
486 __ cmpl(rax, 0x6);
487 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
488
489 // we need to bridge farther than imm8, so we use this island as a thunk
490 __ bind(done);
491 __ jmp(wrapup);
492
493 __ bind(start_simd_check);
494 //
495 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
496 // registers are not restored after a signal processing.
497 // Generate SEGV here (reference through null)
498 // and check upper YMM/ZMM bits after it.
499 //
500 int saved_useavx = UseAVX;
501
502 // If UseAVX is uninitialized or is set by the user to include EVEX
503 if (use_evex) {
504 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
505 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
506 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
507 __ movl(rax, 0x10000);
508 __ andl(rax, Address(rsi, 4));
509 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
510 __ movl(rbx, 0x80000);
511 __ andl(rbx, Address(rsi, 4));
512 __ orl(rax, rbx);
513 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
514 // check _cpuid_info.xem_xcr0_eax.bits.opmask
515 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
516 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
517 __ movl(rax, 0xE0);
518 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
519 __ cmpl(rax, 0xE0);
520 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
521
522 if (FLAG_IS_DEFAULT(UseAVX)) {
523 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
524 __ movl(rax, Address(rsi, 0));
525 __ cmpl(rax, 0x50654); // If it is Skylake
526 __ jcc(Assembler::equal, legacy_setup);
527 }
528 // EVEX setup: run in lowest evex mode
529 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
530 UseAVX = 3;
531 #ifdef _WINDOWS
532 // xmm5-xmm15 are not preserved by caller on windows
533 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
534 __ subptr(rsp, 64);
535 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
536 __ subptr(rsp, 64);
537 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
538 __ subptr(rsp, 64);
539 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
540 #endif // _WINDOWS
541
542 // load value into all 64 bytes of zmm7 register
543 __ movl(rcx, VM_Version::ymm_test_value());
544 __ movdl(xmm0, rcx);
545 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
546 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
547 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
548 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
549 VM_Version::clean_cpuFeatures();
550 __ jmp(save_restore_except);
551 }
552
553 __ bind(legacy_setup);
554 // AVX setup
555 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
556 UseAVX = 1;
557 #ifdef _WINDOWS
558 __ subptr(rsp, 32);
559 __ vmovdqu(Address(rsp, 0), xmm7);
560 __ subptr(rsp, 32);
561 __ vmovdqu(Address(rsp, 0), xmm8);
562 __ subptr(rsp, 32);
563 __ vmovdqu(Address(rsp, 0), xmm15);
564 #endif // _WINDOWS
565
566 // load value into all 32 bytes of ymm7 register
567 __ movl(rcx, VM_Version::ymm_test_value());
568
569 __ movdl(xmm0, rcx);
570 __ pshufd(xmm0, xmm0, 0x00);
571 __ vinsertf128_high(xmm0, xmm0);
572 __ vmovdqu(xmm7, xmm0);
573 __ vmovdqu(xmm8, xmm0);
574 __ vmovdqu(xmm15, xmm0);
575 VM_Version::clean_cpuFeatures();
576
577 __ bind(save_restore_except);
578 __ xorl(rsi, rsi);
579 VM_Version::set_cpuinfo_segv_addr(__ pc());
580 // Generate SEGV
581 __ movl(rax, Address(rsi, 0));
582
583 VM_Version::set_cpuinfo_cont_addr(__ pc());
584 // Returns here after signal. Save xmm0 to check it later.
585
586 // If UseAVX is uninitialized or is set by the user to include EVEX
587 if (use_evex) {
588 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
589 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
590 __ movl(rax, 0x10000);
591 __ andl(rax, Address(rsi, 4));
592 __ jcc(Assembler::equal, legacy_save_restore);
593 // check _cpuid_info.xem_xcr0_eax.bits.opmask
594 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
595 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
596 __ movl(rax, 0xE0);
597 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
598 __ cmpl(rax, 0xE0);
599 __ jcc(Assembler::notEqual, legacy_save_restore);
600
601 if (FLAG_IS_DEFAULT(UseAVX)) {
602 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
603 __ movl(rax, Address(rsi, 0));
604 __ cmpl(rax, 0x50654); // If it is Skylake
605 __ jcc(Assembler::equal, legacy_save_restore);
606 }
607 // EVEX check: run in lowest evex mode
608 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
609 UseAVX = 3;
610 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
611 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
612 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
613 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
614 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
615
616 #ifdef _WINDOWS
617 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
618 __ addptr(rsp, 64);
619 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
620 __ addptr(rsp, 64);
621 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
622 __ addptr(rsp, 64);
623 #endif // _WINDOWS
624 generate_vzeroupper(wrapup);
625 VM_Version::clean_cpuFeatures();
626 UseAVX = saved_useavx;
627 __ jmp(wrapup);
628 }
629
630 __ bind(legacy_save_restore);
631 // AVX check
632 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
633 UseAVX = 1;
634 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
635 __ vmovdqu(Address(rsi, 0), xmm0);
636 __ vmovdqu(Address(rsi, 32), xmm7);
637 __ vmovdqu(Address(rsi, 64), xmm8);
638 __ vmovdqu(Address(rsi, 96), xmm15);
639
640 #ifdef _WINDOWS
641 __ vmovdqu(xmm15, Address(rsp, 0));
642 __ addptr(rsp, 32);
643 __ vmovdqu(xmm8, Address(rsp, 0));
644 __ addptr(rsp, 32);
645 __ vmovdqu(xmm7, Address(rsp, 0));
646 __ addptr(rsp, 32);
647 #endif // _WINDOWS
648
649 generate_vzeroupper(wrapup);
650 VM_Version::clean_cpuFeatures();
651 UseAVX = saved_useavx;
652
653 __ bind(wrapup);
654 __ popf();
655 __ pop(rsi);
656 __ pop(rbx);
657 __ pop(rbp);
658 __ ret(0);
659
660 # undef __
661
662 return start;
663 };
664 void generate_vzeroupper(Label& L_wrapup) {
665 # define __ _masm->
666 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
667 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
668 __ jcc(Assembler::notEqual, L_wrapup);
669 __ movl(rcx, 0x0FFF0FF0);
670 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
671 __ andl(rcx, Address(rsi, 0));
672 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
673 __ jcc(Assembler::equal, L_wrapup);
674 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
675 __ jcc(Assembler::equal, L_wrapup);
676 // vzeroupper() will use a pre-computed instruction sequence that we
677 // can't compute until after we've determined CPU capabilities. Use
678 // uncached variant here directly to be able to bootstrap correctly
679 __ vzeroupper_uncached();
680 # undef __
681 }
682 address generate_detect_virt() {
683 StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
684 # define __ _masm->
685
686 address start = __ pc();
687
688 // Evacuate callee-saved registers
689 __ push(rbp);
690 __ push(rbx);
691 __ push(rsi); // for Windows
692
693 __ mov(rax, c_rarg0); // CPUID leaf
694 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
695
696 __ cpuid();
697
698 // Store result to register array
699 __ movl(Address(rsi, 0), rax);
700 __ movl(Address(rsi, 4), rbx);
701 __ movl(Address(rsi, 8), rcx);
702 __ movl(Address(rsi, 12), rdx);
703
704 // Epilogue
705 __ pop(rsi);
706 __ pop(rbx);
707 __ pop(rbp);
708 __ ret(0);
709
710 # undef __
711
712 return start;
713 };
714
715
716 address generate_getCPUIDBrandString(void) {
717 // Flags to test CPU type.
718 const uint32_t HS_EFL_AC = 0x40000;
719 const uint32_t HS_EFL_ID = 0x200000;
720 // Values for when we don't have a CPUID instruction.
721 const int CPU_FAMILY_SHIFT = 8;
722 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
723 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
724
725 Label detect_486, cpu486, detect_586, done, ext_cpuid;
726
727 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
728 # define __ _masm->
729
730 address start = __ pc();
731
732 //
733 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
734 //
735 // rcx and rdx are first and second argument registers on windows
736
737 __ push(rbp);
738 __ mov(rbp, c_rarg0); // cpuid_info address
739 __ push(rbx);
740 __ push(rsi);
741 __ pushf(); // preserve rbx, and flags
742 __ pop(rax);
743 __ push(rax);
744 __ mov(rcx, rax);
745 //
746 // if we are unable to change the AC flag, we have a 386
747 //
748 __ xorl(rax, HS_EFL_AC);
749 __ push(rax);
750 __ popf();
751 __ pushf();
752 __ pop(rax);
753 __ cmpptr(rax, rcx);
754 __ jccb(Assembler::notEqual, detect_486);
755
756 __ movl(rax, CPU_FAMILY_386);
757 __ jmp(done);
758
759 //
760 // If we are unable to change the ID flag, we have a 486 which does
761 // not support the "cpuid" instruction.
762 //
763 __ bind(detect_486);
764 __ mov(rax, rcx);
765 __ xorl(rax, HS_EFL_ID);
766 __ push(rax);
767 __ popf();
768 __ pushf();
769 __ pop(rax);
770 __ cmpptr(rcx, rax);
771 __ jccb(Assembler::notEqual, detect_586);
772
773 __ bind(cpu486);
774 __ movl(rax, CPU_FAMILY_486);
775 __ jmp(done);
776
777 //
778 // At this point, we have a chip which supports the "cpuid" instruction
779 //
780 __ bind(detect_586);
781 __ xorl(rax, rax);
782 __ cpuid();
783 __ orl(rax, rax);
784 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
785 // value of at least 1, we give up and
786 // assume a 486
787
788 //
789 // Extended cpuid(0x80000000) for processor brand string detection
790 //
791 __ bind(ext_cpuid);
792 __ movl(rax, CPUID_EXTENDED_FN);
793 __ cpuid();
794 __ cmpl(rax, CPUID_EXTENDED_FN_4);
795 __ jcc(Assembler::below, done);
796
797 //
798 // Extended cpuid(0x80000002) // first 16 bytes in brand string
799 //
800 __ movl(rax, CPUID_EXTENDED_FN_2);
801 __ cpuid();
802 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
803 __ movl(Address(rsi, 0), rax);
804 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
805 __ movl(Address(rsi, 0), rbx);
806 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
807 __ movl(Address(rsi, 0), rcx);
808 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
809 __ movl(Address(rsi,0), rdx);
810
811 //
812 // Extended cpuid(0x80000003) // next 16 bytes in brand string
813 //
814 __ movl(rax, CPUID_EXTENDED_FN_3);
815 __ cpuid();
816 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
817 __ movl(Address(rsi, 0), rax);
818 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
819 __ movl(Address(rsi, 0), rbx);
820 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
821 __ movl(Address(rsi, 0), rcx);
822 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
823 __ movl(Address(rsi,0), rdx);
824
825 //
826 // Extended cpuid(0x80000004) // last 16 bytes in brand string
827 //
828 __ movl(rax, CPUID_EXTENDED_FN_4);
829 __ cpuid();
830 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
831 __ movl(Address(rsi, 0), rax);
832 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
833 __ movl(Address(rsi, 0), rbx);
834 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
835 __ movl(Address(rsi, 0), rcx);
836 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
837 __ movl(Address(rsi,0), rdx);
838
839 //
840 // return
841 //
842 __ bind(done);
843 __ popf();
844 __ pop(rsi);
845 __ pop(rbx);
846 __ pop(rbp);
847 __ ret(0);
848
849 # undef __
850
851 return start;
852 };
853 };
854
855 void VM_Version::get_processor_features() {
856
857 _cpu = 4; // 486 by default
858 _model = 0;
859 _stepping = 0;
860 _logical_processors_per_package = 1;
861 // i486 internal cache is both I&D and has a 16-byte line size
862 _L1_data_cache_line_size = 16;
863
864 // Get raw processor info
865
866 get_cpu_info_stub(&_cpuid_info);
867
868 assert_is_initialized();
869 _cpu = extended_cpu_family();
870 _model = extended_cpu_model();
871 _stepping = cpu_stepping();
872
873 if (cpu_family() > 4) { // it supports CPUID
874 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
875 _cpu_features = _features; // Preserve features
876 // Logical processors are only available on P4s and above,
877 // and only if hyperthreading is available.
878 _logical_processors_per_package = logical_processor_count();
879 _L1_data_cache_line_size = L1_line_size();
880 }
881
882 // xchg and xadd instructions
883 _supports_atomic_getset4 = true;
884 _supports_atomic_getadd4 = true;
885 _supports_atomic_getset8 = true;
886 _supports_atomic_getadd8 = true;
887
888 // assigning this field effectively enables Unsafe.writebackMemory()
889 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
890 // that is only implemented on x86_64 and only if the OS plays ball
891 if (os::supports_map_sync()) {
892 // publish data cache line flush size to generic field, otherwise
893 // let if default to zero thereby disabling writeback
894 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
895 }
896
897 // Check if processor has Intel Ecore
898 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
899 (supports_hybrid() ||
900 _model == 0xAF /* Xeon 6 E-cores (Sierra Forest) */ ||
901 _model == 0xDD /* Xeon 6+ E-cores (Clearwater Forest) */ )) {
902 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
903 }
904
905 if (UseSSE < 4) {
906 clear_feature(CPU_SSE4_1);
907 clear_feature(CPU_SSE4_2);
908 }
909
910 if (UseSSE < 3) {
911 clear_feature(CPU_SSE3);
912 clear_feature(CPU_SSSE3);
913 clear_feature(CPU_SSE4A);
914 }
915
916 // ZX cpus specific settings
917 if (is_zx() && FLAG_IS_DEFAULT(UseAVX)) {
918 if (cpu_family() == 7) {
919 if (extended_cpu_model() == 0x5B || extended_cpu_model() == 0x6B) {
920 UseAVX = 1;
921 } else if (extended_cpu_model() == 0x1B || extended_cpu_model() == 0x3B) {
922 UseAVX = 0;
923 }
924 } else if (cpu_family() == 6) {
925 UseAVX = 0;
926 }
927 }
928
929 // UseSSE is set to the smaller of what hardware supports and what
930 // the command line requires. i.e., you cannot set UseSSE to 4 on
931 // older systems which do not support it.
932 int use_sse_limit = 2;
933 if (UseSSE > 3 && supports_sse4_1()) {
934 use_sse_limit = 4;
935 } else if (UseSSE > 2 && supports_sse3()) {
936 use_sse_limit = 3;
937 }
938 if (FLAG_IS_DEFAULT(UseSSE)) {
939 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
940 } else if (UseSSE > use_sse_limit) {
941 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
942 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
943 }
944
945 // first try initial setting and detect what we can support
946 int use_avx_limit = 0;
947 if (UseAVX > 0) {
948 if (UseSSE < 4) {
949 // Don't use AVX if SSE is unavailable or has been disabled.
950 use_avx_limit = 0;
951 } else if (UseAVX > 2 && supports_evex()) {
952 use_avx_limit = 3;
953 } else if (UseAVX > 1 && supports_avx2()) {
954 use_avx_limit = 2;
955 } else if (UseAVX > 0 && supports_avx()) {
956 use_avx_limit = 1;
957 } else {
958 use_avx_limit = 0;
959 }
960 }
961 if (FLAG_IS_DEFAULT(UseAVX)) {
962 // Don't use AVX-512 on older Skylakes unless explicitly requested.
963 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
964 FLAG_SET_DEFAULT(UseAVX, 2);
965 } else {
966 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
967 }
968 }
969
970 if (UseAVX > use_avx_limit) {
971 if (UseSSE < 4) {
972 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
973 } else {
974 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
975 }
976 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
977 }
978
979 if (UseAVX < 3) {
980 clear_feature(CPU_AVX512F);
981 clear_feature(CPU_AVX512DQ);
982 clear_feature(CPU_AVX512CD);
983 clear_feature(CPU_AVX512BW);
984 clear_feature(CPU_AVX512ER);
985 clear_feature(CPU_AVX512PF);
986 clear_feature(CPU_AVX512VL);
987 clear_feature(CPU_AVX512_VPOPCNTDQ);
988 clear_feature(CPU_AVX512_VPCLMULQDQ);
989 clear_feature(CPU_AVX512_VAES);
990 clear_feature(CPU_AVX512_VNNI);
991 clear_feature(CPU_AVX512_VBMI);
992 clear_feature(CPU_AVX512_VBMI2);
993 clear_feature(CPU_AVX512_BITALG);
994 clear_feature(CPU_AVX512_IFMA);
995 clear_feature(CPU_APX_F);
996 clear_feature(CPU_AVX512_FP16);
997 clear_feature(CPU_AVX10_1);
998 clear_feature(CPU_AVX10_2);
999 }
1000
1001
1002 if (UseAVX < 2) {
1003 clear_feature(CPU_AVX2);
1004 clear_feature(CPU_AVX_IFMA);
1005 }
1006
1007 if (UseAVX < 1) {
1008 clear_feature(CPU_AVX);
1009 clear_feature(CPU_VZEROUPPER);
1010 clear_feature(CPU_F16C);
1011 clear_feature(CPU_SHA512);
1012 }
1013
1014 if (logical_processors_per_package() == 1) {
1015 // HT processor could be installed on a system which doesn't support HT.
1016 clear_feature(CPU_HT);
1017 }
1018
1019 if (is_intel()) { // Intel cpus specific settings
1020 if (is_knights_family()) {
1021 clear_feature(CPU_VZEROUPPER);
1022 clear_feature(CPU_AVX512BW);
1023 clear_feature(CPU_AVX512VL);
1024 clear_feature(CPU_APX_F);
1025 clear_feature(CPU_AVX512DQ);
1026 clear_feature(CPU_AVX512_VNNI);
1027 clear_feature(CPU_AVX512_VAES);
1028 clear_feature(CPU_AVX512_VPOPCNTDQ);
1029 clear_feature(CPU_AVX512_VPCLMULQDQ);
1030 clear_feature(CPU_AVX512_VBMI);
1031 clear_feature(CPU_AVX512_VBMI2);
1032 clear_feature(CPU_CLWB);
1033 clear_feature(CPU_FLUSHOPT);
1034 clear_feature(CPU_GFNI);
1035 clear_feature(CPU_AVX512_BITALG);
1036 clear_feature(CPU_AVX512_IFMA);
1037 clear_feature(CPU_AVX_IFMA);
1038 clear_feature(CPU_AVX512_FP16);
1039 clear_feature(CPU_AVX10_1);
1040 clear_feature(CPU_AVX10_2);
1041 }
1042 }
1043
1044 // Currently APX support is only enabled for targets supporting AVX512VL feature.
1045 if (supports_apx_f() && os_supports_apx_egprs() && supports_avx512vl()) {
1046 if (FLAG_IS_DEFAULT(UseAPX)) {
1047 FLAG_SET_DEFAULT(UseAPX, false); // by default UseAPX is false
1048 clear_feature(CPU_APX_F);
1049 } else if (!UseAPX) {
1050 clear_feature(CPU_APX_F);
1051 }
1052 } else {
1053 if (!os_supports_apx_egprs() || !supports_avx512vl()) {
1054 clear_feature(CPU_APX_F);
1055 }
1056 if (UseAPX) {
1057 if (!FLAG_IS_DEFAULT(UseAPX)) {
1058 warning("APX instructions are not available on this CPU");
1059 }
1060 FLAG_SET_DEFAULT(UseAPX, false);
1061 }
1062 }
1063
1064 CHECK_CPU_FEATURE(UseCLMUL, CLMUL, supports_clmul(), "CLMUL" MULTI_INST_WARNING_MSG);
1065 CHECK_CPU_FEATURE(UseAES, AES, supports_aes(), "AES" MULTI_INST_WARNING_MSG);
1066 CHECK_CPU_FEATURE(UseFMA, FMA, supports_fma(), "FMA" MULTI_INST_WARNING_MSG);
1067 CHECK_CPU_FEATURE(UseCountLeadingZerosInstruction, LZCNT, supports_lzcnt(), "lzcnt" SINGLE_INST_WARNING_MSG);
1068 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1069 // VEX prefix is generated only when AVX > 0.
1070 CHECK_CPU_FEATURE(UseBMI1Instructions, BMI1, supports_bmi1(), "BMI1" MULTI_INST_WARNING_MSG);
1071
1072 if (supports_bmi2() && supports_avx()) {
1073 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1074 FLAG_SET_DEFAULT(UseBMI2Instructions, true);
1075 } else if (!UseBMI2Instructions) {
1076 clear_feature(CPU_BMI2);
1077 }
1078 } else {
1079 if (!supports_avx()) {
1080 clear_feature(CPU_BMI2);
1081 }
1082 if (UseBMI2Instructions) {
1083 if (!FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1084 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1085 }
1086 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1087 }
1088 }
1089
1090 CHECK_CPU_FEATURE(UsePopCountInstruction, POPCNT, supports_popcnt(), "popcnt" SINGLE_INST_WARNING_MSG);
1091 CHECK_CPU_FEATURE(UseSHA, SHA, supports_sha() || (supports_avx2() && supports_bmi2()), "SHA" MULTI_INST_WARNING_MSG);
1092
1093 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1094 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1095 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1096 } else {
1097 _has_intel_jcc_erratum = IntelJccErratumMitigation;
1098 }
1099
1100 if (X86ICacheSync == -1) {
1101 // Auto-detect, choosing the best performant one that still flushes
1102 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1103 if (supports_clwb()) {
1104 FLAG_SET_ERGO(X86ICacheSync, 3);
1105 } else if (supports_clflushopt()) {
1106 FLAG_SET_ERGO(X86ICacheSync, 2);
1107 } else {
1108 FLAG_SET_ERGO(X86ICacheSync, 1);
1109 }
1110 } else {
1111 if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1112 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1113 }
1114 if ((X86ICacheSync == 3) && !supports_clwb()) {
1115 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1116 }
1117 if ((X86ICacheSync == 5) && !supports_serialize()) {
1118 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1119 }
1120 }
1121
1122 stringStream ss(2048);
1123 if (supports_hybrid()) {
1124 ss.print("(hybrid)");
1125 } else {
1126 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1127 }
1128 ss.print(" family %d model %d stepping %d microcode 0x%x",
1129 cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1130 ss.print(", ");
1131 int features_offset = (int)ss.size();
1132 insert_features_names(_features, ss);
1133
1134 _cpu_info_string = ss.as_string(true);
1135 _features_string = _cpu_info_string + features_offset;
1136
1137 // Use AES instructions if available.
1138 if (supports_aes()) {
1139 if (supports_sse3()) {
1140 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1141 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1142 }
1143 } else if (UseAESIntrinsics) {
1144 // The AES intrinsic stubs require AES instruction support (of course)
1145 // but also require sse3 mode or higher for instructions it use.
1146 if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1147 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1148 }
1149 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1150 }
1151 if (!UseAESIntrinsics) {
1152 if (UseAESCTRIntrinsics) {
1153 if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1154 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1155 }
1156 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1157 }
1158 } else {
1159 if (supports_sse4_1()) {
1160 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1161 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1162 }
1163 } else if (UseAESCTRIntrinsics) {
1164 // The AES-CTR intrinsic stubs require AES instruction support (of course)
1165 // but also require sse4.1 mode or higher for instructions it use.
1166 if (!FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1167 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1168 }
1169 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1170 }
1171 }
1172 } else {
1173 if (!cpu_supports_aes()) {
1174 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1175 warning("AES intrinsics are not available on this CPU");
1176 }
1177 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1178 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1179 warning("AES-CTR intrinsics are not available on this CPU");
1180 }
1181 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1182 } else if (!UseAES) {
1183 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1184 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1185 }
1186 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1187 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1188 warning("AES_CTR intrinsics require UseAES flag to be enabled. AES_CTR intrinsics will be disabled.");
1189 }
1190 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1191 }
1192 }
1193
1194 if (UseCLMUL && (UseSSE > 2)) {
1195 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1196 UseCRC32Intrinsics = true;
1197 }
1198 } else if (UseCRC32Intrinsics) {
1199 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1200 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1201 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1202 }
1203
1204 if (supports_avx2()) {
1205 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1206 UseAdler32Intrinsics = true;
1207 }
1208 } else if (UseAdler32Intrinsics) {
1209 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1210 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1211 }
1212 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1213 }
1214
1215 if (supports_sse4_2() && supports_clmul()) {
1216 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1217 UseCRC32CIntrinsics = true;
1218 }
1219 } else if (UseCRC32CIntrinsics) {
1220 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1221 warning("CRC32C intrinsics are not available on this CPU");
1222 }
1223 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1224 }
1225
1226 // GHASH/GCM intrinsics
1227 if (UseCLMUL && (UseSSE > 2)) {
1228 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1229 UseGHASHIntrinsics = true;
1230 }
1231 } else if (UseGHASHIntrinsics) {
1232 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1233 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1234 }
1235 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1236 }
1237
1238 // ChaCha20 Intrinsics
1239 // As long as the system supports AVX as a baseline we can do a
1240 // SIMD-enabled block function. StubGenerator makes the determination
1241 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1242 // version.
1243 if (UseAVX >= 1) {
1244 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1245 UseChaCha20Intrinsics = true;
1246 }
1247 } else if (UseChaCha20Intrinsics) {
1248 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1249 warning("ChaCha20 intrinsic requires AVX instructions");
1250 }
1251 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1252 }
1253
1254 // Kyber Intrinsics
1255 // Currently we only have them for AVX512
1256 if (supports_evex() && supports_avx512bw()) {
1257 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1258 UseKyberIntrinsics = true;
1259 }
1260 } else if (UseKyberIntrinsics) {
1261 if (!FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1262 warning("Intrinsics for ML-KEM are not available on this CPU.");
1263 }
1264 FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1265 }
1266
1267 // Dilithium Intrinsics
1268 if (UseAVX > 1) {
1269 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1270 UseDilithiumIntrinsics = true;
1271 }
1272 } else if (UseDilithiumIntrinsics) {
1273 if (!FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1274 warning("Intrinsics for ML-DSA are not available on this CPU.");
1275 }
1276 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1277 }
1278
1279 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1280 if (UseAVX >= 2) {
1281 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1282 UseBASE64Intrinsics = true;
1283 }
1284 } else if (UseBASE64Intrinsics) {
1285 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1286 warning("Base64 intrinsic requires EVEX instructions on this CPU");
1287 }
1288 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1289 }
1290
1291 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1292 UseMD5Intrinsics = true;
1293 }
1294
1295 if (supports_sha() && supports_sse4_1() && UseSHA) {
1296 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1297 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1298 }
1299 } else if (UseSHA1Intrinsics) {
1300 if (!FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1301 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1302 }
1303 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1304 }
1305
1306 if (supports_sse4_1() && UseSHA) {
1307 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1308 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1309 }
1310 } else if (UseSHA256Intrinsics) {
1311 if (!FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1312 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1313 }
1314 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1315 }
1316
1317 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1318 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1319 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1320 }
1321 } else if (UseSHA512Intrinsics) {
1322 if (!FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1323 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1324 }
1325 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1326 }
1327
1328 if (UseSHA && supports_evex() && supports_avx512bw()) {
1329 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1330 FLAG_SET_DEFAULT(UseSHA3Intrinsics, true);
1331 }
1332 } else if (UseSHA3Intrinsics) {
1333 if (!FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1334 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1335 }
1336 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1337 }
1338
1339 #if COMPILER2_OR_JVMCI
1340 int max_vector_size = 0;
1341 if (UseAVX == 0 || !os_supports_avx_vectors()) {
1342 // 16 byte vectors (in XMM) are supported with SSE2+
1343 max_vector_size = 16;
1344 } else if (UseAVX == 1 || UseAVX == 2) {
1345 // 32 bytes vectors (in YMM) are only supported with AVX+
1346 max_vector_size = 32;
1347 } else if (UseAVX > 2) {
1348 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1349 max_vector_size = 64;
1350 }
1351
1352 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1353
1354 if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1355 if (MaxVectorSize < min_vector_size) {
1356 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1357 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1358 }
1359 if (MaxVectorSize > max_vector_size) {
1360 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1361 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1362 }
1363 if (!is_power_of_2(MaxVectorSize)) {
1364 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1365 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1366 }
1367 } else {
1368 // If default, use highest supported configuration
1369 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1370 }
1371
1372 #if defined(COMPILER2) && defined(ASSERT)
1373 if (MaxVectorSize > 0) {
1374 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1375 tty->print_cr("State of YMM registers after signal handle:");
1376 int nreg = 4;
1377 const char* ymm_name[4] = {"0", "7", "8", "15"};
1378 for (int i = 0; i < nreg; i++) {
1379 tty->print("YMM%s:", ymm_name[i]);
1380 for (int j = 7; j >=0; j--) {
1381 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1382 }
1383 tty->cr();
1384 }
1385 }
1386 }
1387 #endif // COMPILER2 && ASSERT
1388
1389 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1390 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1391 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1392 }
1393 } else if (UsePoly1305Intrinsics) {
1394 if (!FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1395 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1396 }
1397 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1398 }
1399
1400 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1401 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1402 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1403 }
1404 } else if (UseIntPolyIntrinsics) {
1405 if (!FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1406 warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1407 }
1408 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1409 }
1410
1411 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1412 UseMultiplyToLenIntrinsic = true;
1413 }
1414 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1415 UseSquareToLenIntrinsic = true;
1416 }
1417 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1418 UseMulAddIntrinsic = true;
1419 }
1420 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1421 UseMontgomeryMultiplyIntrinsic = true;
1422 }
1423 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1424 UseMontgomerySquareIntrinsic = true;
1425 }
1426 #endif // COMPILER2_OR_JVMCI
1427
1428 // On new cpus instructions which update whole XMM register should be used
1429 // to prevent partial register stall due to dependencies on high half.
1430 //
1431 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1432 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1433 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1434 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1435
1436
1437 if (is_zx()) { // ZX cpus specific settings
1438 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1439 UseStoreImmI16 = false; // don't use it on ZX cpus
1440 }
1441 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1442 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1443 // Use it on all ZX cpus
1444 UseAddressNop = true;
1445 }
1446 }
1447 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1448 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1449 }
1450 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1451 if (supports_sse3()) {
1452 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1453 } else {
1454 UseXmmRegToRegMoveAll = false;
1455 }
1456 }
1457 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1458 #ifdef COMPILER2
1459 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1460 // For new ZX cpus do the next optimization:
1461 // don't align the beginning of a loop if there are enough instructions
1462 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1463 // in current fetch line (OptoLoopAlignment) or the padding
1464 // is big (> MaxLoopPad).
1465 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1466 // generated NOP instructions. 11 is the largest size of one
1467 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1468 MaxLoopPad = 11;
1469 }
1470 #endif // COMPILER2
1471 if (supports_sse4_2()) { // new ZX cpus
1472 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1473 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1474 }
1475 }
1476 }
1477
1478 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1479 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1480 }
1481 }
1482
1483 if (is_amd_family()) { // AMD cpus specific settings
1484 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1485 // Use it on new AMD cpus starting from Opteron.
1486 UseAddressNop = true;
1487 }
1488 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1489 if (supports_sse4a()) {
1490 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1491 } else {
1492 UseXmmLoadAndClearUpper = false;
1493 }
1494 }
1495 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1496 if (supports_sse4a()) {
1497 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1498 } else {
1499 UseXmmRegToRegMoveAll = false;
1500 }
1501 }
1502 if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1503 if (supports_sse4a()) {
1504 UseXmmI2F = true;
1505 } else {
1506 UseXmmI2F = false;
1507 }
1508 }
1509 if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1510 if (supports_sse4a()) {
1511 UseXmmI2D = true;
1512 } else {
1513 UseXmmI2D = false;
1514 }
1515 }
1516
1517 // some defaults for AMD family 15h
1518 if (cpu_family() == 0x15) {
1519 // On family 15h processors default is no sw prefetch
1520 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1521 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1522 }
1523 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1524 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1525 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1526 }
1527 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1528 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1529 }
1530 }
1531
1532 #ifdef COMPILER2
1533 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1534 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1535 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1536 }
1537 #endif // COMPILER2
1538
1539 // Some defaults for AMD family >= 17h && Hygon family 18h
1540 if (cpu_family() >= 0x17) {
1541 // On family >=17h processors use XMM and UnalignedLoadStores
1542 // for Array Copy
1543 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1544 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1545 }
1546 #ifdef COMPILER2
1547 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1548 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1549 }
1550 #endif
1551 }
1552 }
1553
1554 if (is_intel()) { // Intel cpus specific settings
1555 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1556 UseStoreImmI16 = false; // don't use it on Intel cpus
1557 }
1558 if (is_intel_server_family() || cpu_family() == 15) {
1559 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1560 // Use it on all Intel cpus starting from PentiumPro
1561 UseAddressNop = true;
1562 }
1563 }
1564 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1565 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1566 }
1567 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1568 if (supports_sse3()) {
1569 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1570 } else {
1571 UseXmmRegToRegMoveAll = false;
1572 }
1573 }
1574 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1575 #ifdef COMPILER2
1576 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1577 // For new Intel cpus do the next optimization:
1578 // don't align the beginning of a loop if there are enough instructions
1579 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1580 // in current fetch line (OptoLoopAlignment) or the padding
1581 // is big (> MaxLoopPad).
1582 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1583 // generated NOP instructions. 11 is the largest size of one
1584 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1585 MaxLoopPad = 11;
1586 }
1587 #endif // COMPILER2
1588
1589 if (is_intel_modern_cpu()) { // Newest Intel cpus
1590 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1591 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1592 }
1593 }
1594 }
1595 if (is_atom_family() || is_knights_family()) {
1596 #ifdef COMPILER2
1597 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1598 OptoScheduling = true;
1599 }
1600 #endif
1601 if (supports_sse4_2()) { // Silvermont
1602 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1603 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1604 }
1605 }
1606 if (FLAG_IS_DEFAULT(UseIncDec)) {
1607 FLAG_SET_DEFAULT(UseIncDec, false);
1608 }
1609 }
1610 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1611 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1612 }
1613 }
1614
1615 #ifdef COMPILER2
1616 if (UseAVX > 2) {
1617 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1618 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1619 ArrayOperationPartialInlineSize != 0 &&
1620 ArrayOperationPartialInlineSize != 16 &&
1621 ArrayOperationPartialInlineSize != 32 &&
1622 ArrayOperationPartialInlineSize != 64)) {
1623 int inline_size = 0;
1624 if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1625 inline_size = 64;
1626 } else if (MaxVectorSize >= 32) {
1627 inline_size = 32;
1628 } else if (MaxVectorSize >= 16) {
1629 inline_size = 16;
1630 }
1631 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1632 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1633 }
1634 ArrayOperationPartialInlineSize = inline_size;
1635 }
1636
1637 if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1638 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1639 if (ArrayOperationPartialInlineSize) {
1640 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1641 } else {
1642 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1643 }
1644 }
1645 }
1646
1647 if (FLAG_IS_DEFAULT(OptimizeFill)) {
1648 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1649 OptimizeFill = false;
1650 }
1651 }
1652 #endif
1653 if (supports_sse4_2()) {
1654 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1655 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1656 }
1657 } else if (UseSSE42Intrinsics) {
1658 if (!FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1659 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1660 }
1661 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1662 }
1663 if (UseSSE42Intrinsics) {
1664 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1665 UseVectorizedMismatchIntrinsic = true;
1666 }
1667 } else if (UseVectorizedMismatchIntrinsic) {
1668 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1669 warning("vectorizedMismatch intrinsics are not available on this CPU");
1670 }
1671 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1672 }
1673 if (UseAVX >= 2) {
1674 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1675 } else if (UseVectorizedHashCodeIntrinsic) {
1676 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic)) {
1677 warning("vectorizedHashCode intrinsics are not available on this CPU");
1678 }
1679 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1680 }
1681
1682 // Use count trailing zeros instruction if available
1683 if (supports_bmi1()) {
1684 // tzcnt does not require VEX prefix
1685 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1686 UseCountTrailingZerosInstruction = true;
1687 }
1688 } else if (UseCountTrailingZerosInstruction) {
1689 if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1690 warning("tzcnt instruction is not available on this CPU");
1691 }
1692 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1693 }
1694
1695 // Use fast-string operations if available.
1696 if (supports_erms()) {
1697 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1698 UseFastStosb = true;
1699 }
1700 } else if (UseFastStosb) {
1701 if (!FLAG_IS_DEFAULT(UseFastStosb)) {
1702 warning("fast-string operations are not available on this CPU");
1703 }
1704 FLAG_SET_DEFAULT(UseFastStosb, false);
1705 }
1706
1707 // For AMD Processors use XMM/YMM MOVDQU instructions
1708 // for Object Initialization as default
1709 if (is_amd() && cpu_family() >= 0x19) {
1710 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1711 UseFastStosb = false;
1712 }
1713 }
1714
1715 #ifdef COMPILER2
1716 if (is_intel() && MaxVectorSize > 16) {
1717 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1718 UseFastStosb = false;
1719 }
1720 }
1721 #endif
1722
1723 // Use XMM/YMM MOVDQU instruction for Object Initialization
1724 if (!UseFastStosb && UseUnalignedLoadStores) {
1725 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1726 UseXMMForObjInit = true;
1727 }
1728 } else if (UseXMMForObjInit) {
1729 if (!FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1730 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1731 }
1732 FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1733 }
1734
1735 #ifdef COMPILER2
1736 if (FLAG_IS_DEFAULT(AlignVector)) {
1737 // Modern processors allow misaligned memory operations for vectors.
1738 AlignVector = !UseUnalignedLoadStores;
1739 }
1740 #endif // COMPILER2
1741
1742 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1743 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1744 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1745 }
1746 }
1747
1748 // Allocation prefetch settings
1749 int cache_line_size = checked_cast<int>(prefetch_data_size());
1750 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1751 (cache_line_size > AllocatePrefetchStepSize)) {
1752 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1753 }
1754
1755 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1756 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1757 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1758 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1759 }
1760 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1761 }
1762
1763 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1764 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1765 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1766 }
1767
1768 if (is_intel() && is_intel_server_family() && supports_sse3()) {
1769 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1770 is_intel_modern_cpu()) { // Nehalem based cpus
1771 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1772 }
1773 #ifdef COMPILER2
1774 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1775 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1776 }
1777 #endif
1778 }
1779
1780 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1781 #ifdef COMPILER2
1782 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1783 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1784 }
1785 #endif
1786 }
1787
1788 // Prefetch settings
1789
1790 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1791 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1792 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1793 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1794
1795 // gc copy/scan is disabled if prefetchw isn't supported, because
1796 // Prefetch::write emits an inlined prefetchw on Linux.
1797 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1798 // The used prefetcht0 instruction works for both amd64 and em64t.
1799
1800 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1801 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1802 }
1803 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1804 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1805 }
1806
1807 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1808 (cache_line_size > ContendedPaddingWidth))
1809 ContendedPaddingWidth = cache_line_size;
1810
1811 // This machine allows unaligned memory accesses
1812 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1813 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1814 }
1815
1816 #ifndef PRODUCT
1817 if (log_is_enabled(Info, os, cpu)) {
1818 LogStream ls(Log(os, cpu)::info());
1819 outputStream* log = &ls;
1820 log->print_cr("Logical CPUs per core: %u",
1821 logical_processors_per_package());
1822 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1823 log->print("UseSSE=%d", UseSSE);
1824 if (UseAVX > 0) {
1825 log->print(" UseAVX=%d", UseAVX);
1826 }
1827 if (UseAES) {
1828 log->print(" UseAES=1");
1829 }
1830 #ifdef COMPILER2
1831 if (MaxVectorSize > 0) {
1832 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1833 }
1834 #endif
1835 log->cr();
1836 log->print("Allocation");
1837 if (AllocatePrefetchStyle <= 0) {
1838 log->print_cr(": no prefetching");
1839 } else {
1840 log->print(" prefetching: ");
1841 if (AllocatePrefetchInstr == 0) {
1842 log->print("PREFETCHNTA");
1843 } else if (AllocatePrefetchInstr == 1) {
1844 log->print("PREFETCHT0");
1845 } else if (AllocatePrefetchInstr == 2) {
1846 log->print("PREFETCHT2");
1847 } else if (AllocatePrefetchInstr == 3) {
1848 log->print("PREFETCHW");
1849 }
1850 if (AllocatePrefetchLines > 1) {
1851 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1852 } else {
1853 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1854 }
1855 }
1856
1857 if (PrefetchCopyIntervalInBytes > 0) {
1858 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1859 }
1860 if (PrefetchScanIntervalInBytes > 0) {
1861 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1862 }
1863 if (ContendedPaddingWidth > 0) {
1864 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1865 }
1866 }
1867 #endif // !PRODUCT
1868 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1869 FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1870 }
1871 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1872 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1873 }
1874 // CopyAVX3Threshold is the threshold at which 64-byte vector instructions
1875 // are used for implementing the array copy, fill and clear operations.
1876 // The Intel platforms that support the serialize instruction and the AMD
1877 // platforms with native 512-bit datapath have improved implementation of
1878 // 64-byte load/stores and so the default threshold is set to 0 for these
1879 // platforms.
1880 if (FLAG_IS_DEFAULT(CopyAVX3Threshold)) {
1881 if (is_intel() && is_intel_server_family() && supports_serialize()) {
1882 FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1883 } else if (is_amd() && is_amd_avx512_datapath_server_family()) {
1884 FLAG_SET_DEFAULT(CopyAVX3Threshold, 0);
1885 } else {
1886 FLAG_SET_DEFAULT(CopyAVX3Threshold, AVX3Threshold);
1887 }
1888 }
1889 }
1890
1891 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1892 VirtualizationType vrt = VM_Version::get_detected_virtualization();
1893 if (vrt == XenHVM) {
1894 st->print_cr("Xen hardware-assisted virtualization detected");
1895 } else if (vrt == KVM) {
1896 st->print_cr("KVM virtualization detected");
1897 } else if (vrt == VMWare) {
1898 st->print_cr("VMWare virtualization detected");
1899 VirtualizationSupport::print_virtualization_info(st);
1900 } else if (vrt == HyperV) {
1901 st->print_cr("Hyper-V virtualization detected");
1902 } else if (vrt == HyperVRole) {
1903 st->print_cr("Hyper-V role detected");
1904 }
1905 }
1906
1907 bool VM_Version::compute_has_intel_jcc_erratum() {
1908 if (!is_intel_family_core()) {
1909 // Only Intel CPUs are affected.
1910 return false;
1911 }
1912 // The following table of affected CPUs is based on the following document released by Intel:
1913 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1914 switch (_model) {
1915 case 0x8E:
1916 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1917 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1918 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1919 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1920 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1921 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1922 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1923 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1924 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1925 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1926 case 0x4E:
1927 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1928 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1929 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1930 return _stepping == 0x3;
1931 case 0x55:
1932 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1933 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1934 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1935 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1936 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1937 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1938 return _stepping == 0x4 || _stepping == 0x7;
1939 case 0x5E:
1940 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1941 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1942 return _stepping == 0x3;
1943 case 0x9E:
1944 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
1945 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
1946 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
1947 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
1948 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
1949 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
1950 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
1951 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
1952 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
1953 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
1954 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
1955 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
1956 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
1957 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
1958 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
1959 case 0xA5:
1960 // Not in Intel documentation.
1961 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
1962 return true;
1963 case 0xA6:
1964 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
1965 return _stepping == 0x0;
1966 case 0xAE:
1967 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
1968 return _stepping == 0xA;
1969 default:
1970 // If we are running on another intel machine not recognized in the table, we are okay.
1971 return false;
1972 }
1973 }
1974
1975 // On Xen, the cpuid instruction returns
1976 // eax / registers[0]: Version of Xen
1977 // ebx / registers[1]: chars 'XenV'
1978 // ecx / registers[2]: chars 'MMXe'
1979 // edx / registers[3]: chars 'nVMM'
1980 //
1981 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
1982 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
1983 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
1984 // edx / registers[3]: chars 'M' / 'ware' / 't Hv'
1985 //
1986 // more information :
1987 // https://kb.vmware.com/s/article/1009458
1988 //
1989 void VM_Version::check_virtualizations() {
1990 uint32_t registers[4] = {0};
1991 char signature[13] = {0};
1992
1993 // Xen cpuid leaves can be found 0x100 aligned boundary starting
1994 // from 0x40000000 until 0x40010000.
1995 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
1996 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
1997 detect_virt_stub(leaf, registers);
1998 memcpy(signature, ®isters[1], 12);
1999
2000 if (strncmp("VMwareVMware", signature, 12) == 0) {
2001 Abstract_VM_Version::_detected_virtualization = VMWare;
2002 // check for extended metrics from guestlib
2003 VirtualizationSupport::initialize();
2004 } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2005 Abstract_VM_Version::_detected_virtualization = HyperV;
2006 #ifdef _WINDOWS
2007 // CPUID leaf 0x40000007 is available to the root partition only.
2008 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2009 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2010 detect_virt_stub(0x40000007, registers);
2011 if ((registers[0] != 0x0) ||
2012 (registers[1] != 0x0) ||
2013 (registers[2] != 0x0) ||
2014 (registers[3] != 0x0)) {
2015 Abstract_VM_Version::_detected_virtualization = HyperVRole;
2016 }
2017 #endif
2018 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2019 Abstract_VM_Version::_detected_virtualization = KVM;
2020 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2021 Abstract_VM_Version::_detected_virtualization = XenHVM;
2022 }
2023 }
2024 }
2025
2026 #ifdef COMPILER2
2027 // Determine if it's running on Cascade Lake using default options.
2028 bool VM_Version::is_default_intel_cascade_lake() {
2029 return FLAG_IS_DEFAULT(UseAVX) &&
2030 FLAG_IS_DEFAULT(MaxVectorSize) &&
2031 UseAVX > 2 &&
2032 is_intel_cascade_lake();
2033 }
2034 #endif
2035
2036 bool VM_Version::is_intel_cascade_lake() {
2037 return is_intel_skylake() && _stepping >= 5;
2038 }
2039
2040 bool VM_Version::is_intel_darkmont() {
2041 return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2042 }
2043
2044 void VM_Version::clear_apx_test_state() {
2045 clear_apx_test_state_stub();
2046 }
2047
2048 static bool _vm_version_initialized = false;
2049
2050 void VM_Version::initialize() {
2051 ResourceMark rm;
2052
2053 // Making this stub must be FIRST use of assembler
2054 stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2055 if (stub_blob == nullptr) {
2056 vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2057 }
2058 CodeBuffer c(stub_blob);
2059 VM_Version_StubGenerator g(&c);
2060
2061 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2062 g.generate_get_cpu_info());
2063 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2064 g.generate_detect_virt());
2065 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2066 g.clear_apx_test_state());
2067 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2068 g.generate_getCPUIDBrandString());
2069 get_processor_features();
2070
2071 Assembler::precompute_instructions();
2072
2073 if (VM_Version::supports_hv()) { // Supports hypervisor
2074 check_virtualizations();
2075 }
2076 _vm_version_initialized = true;
2077 }
2078
2079 typedef enum {
2080 CPU_FAMILY_8086_8088 = 0,
2081 CPU_FAMILY_INTEL_286 = 2,
2082 CPU_FAMILY_INTEL_386 = 3,
2083 CPU_FAMILY_INTEL_486 = 4,
2084 CPU_FAMILY_PENTIUM = 5,
2085 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models
2086 CPU_FAMILY_PENTIUM_4 = 0xF
2087 } FamilyFlag;
2088
2089 typedef enum {
2090 RDTSCP_FLAG = 0x08000000, // bit 27
2091 INTEL64_FLAG = 0x20000000 // bit 29
2092 } _featureExtendedEdxFlag;
2093
2094 typedef enum {
2095 FPU_FLAG = 0x00000001,
2096 VME_FLAG = 0x00000002,
2097 DE_FLAG = 0x00000004,
2098 PSE_FLAG = 0x00000008,
2099 TSC_FLAG = 0x00000010,
2100 MSR_FLAG = 0x00000020,
2101 PAE_FLAG = 0x00000040,
2102 MCE_FLAG = 0x00000080,
2103 CX8_FLAG = 0x00000100,
2104 APIC_FLAG = 0x00000200,
2105 SEP_FLAG = 0x00000800,
2106 MTRR_FLAG = 0x00001000,
2107 PGE_FLAG = 0x00002000,
2108 MCA_FLAG = 0x00004000,
2109 CMOV_FLAG = 0x00008000,
2110 PAT_FLAG = 0x00010000,
2111 PSE36_FLAG = 0x00020000,
2112 PSNUM_FLAG = 0x00040000,
2113 CLFLUSH_FLAG = 0x00080000,
2114 DTS_FLAG = 0x00200000,
2115 ACPI_FLAG = 0x00400000,
2116 MMX_FLAG = 0x00800000,
2117 FXSR_FLAG = 0x01000000,
2118 SSE_FLAG = 0x02000000,
2119 SSE2_FLAG = 0x04000000,
2120 SS_FLAG = 0x08000000,
2121 HTT_FLAG = 0x10000000,
2122 TM_FLAG = 0x20000000
2123 } FeatureEdxFlag;
2124
2125 // VM_Version statics
2126 enum {
2127 ExtendedFamilyIdLength_INTEL = 16,
2128 ExtendedFamilyIdLength_AMD = 24
2129 };
2130
2131 const size_t VENDOR_LENGTH = 13;
2132 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2133 static char* _cpu_brand_string = nullptr;
2134 static int64_t _max_qualified_cpu_frequency = 0;
2135
2136 static int _no_of_threads = 0;
2137 static int _no_of_cores = 0;
2138
2139 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2140 "8086/8088",
2141 "",
2142 "286",
2143 "386",
2144 "486",
2145 "Pentium",
2146 "Pentium Pro", //or Pentium-M/Woodcrest depending on model
2147 "",
2148 "",
2149 "",
2150 "",
2151 "",
2152 "",
2153 "",
2154 "",
2155 "Pentium 4"
2156 };
2157
2158 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2159 "",
2160 "",
2161 "",
2162 "",
2163 "5x86",
2164 "K5/K6",
2165 "Athlon/AthlonXP",
2166 "",
2167 "",
2168 "",
2169 "",
2170 "",
2171 "",
2172 "",
2173 "",
2174 "Opteron/Athlon64",
2175 "Opteron QC/Phenom", // Barcelona et.al.
2176 "",
2177 "",
2178 "",
2179 "",
2180 "",
2181 "",
2182 "Zen"
2183 };
2184 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2185 // September 2013, Vol 3C Table 35-1
2186 const char* const _model_id_pentium_pro[] = {
2187 "",
2188 "Pentium Pro",
2189 "",
2190 "Pentium II model 3",
2191 "",
2192 "Pentium II model 5/Xeon/Celeron",
2193 "Celeron",
2194 "Pentium III/Pentium III Xeon",
2195 "Pentium III/Pentium III Xeon",
2196 "Pentium M model 9", // Yonah
2197 "Pentium III, model A",
2198 "Pentium III, model B",
2199 "",
2200 "Pentium M model D", // Dothan
2201 "",
2202 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2203 "",
2204 "",
2205 "",
2206 "",
2207 "",
2208 "",
2209 "Celeron", // 0x16 Celeron 65nm
2210 "Core 2", // 0x17 Penryn / Harpertown
2211 "",
2212 "",
2213 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP
2214 "Atom", // 0x1B Z5xx series Silverthorn
2215 "",
2216 "Core 2", // 0x1D Dunnington (6-core)
2217 "Nehalem", // 0x1E CPU_MODEL_NEHALEM
2218 "",
2219 "",
2220 "",
2221 "",
2222 "",
2223 "",
2224 "Westmere", // 0x25 CPU_MODEL_WESTMERE
2225 "",
2226 "",
2227 "", // 0x28
2228 "",
2229 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3"
2230 "",
2231 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP
2232 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2233 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX
2234 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX
2235 "",
2236 "",
2237 "",
2238 "",
2239 "",
2240 "",
2241 "",
2242 "",
2243 "",
2244 "",
2245 "Ivy Bridge", // 0x3a
2246 "",
2247 "Haswell", // 0x3c "4th Generation Intel Core Processor"
2248 "", // 0x3d "Next Generation Intel Core Processor"
2249 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2250 "", // 0x3f "Future Generation Intel Xeon Processor"
2251 "",
2252 "",
2253 "",
2254 "",
2255 "",
2256 "Haswell", // 0x45 "4th Generation Intel Core Processor"
2257 "Haswell", // 0x46 "4th Generation Intel Core Processor"
2258 nullptr
2259 };
2260
2261 /* Brand ID is for back compatibility
2262 * Newer CPUs uses the extended brand string */
2263 const char* const _brand_id[] = {
2264 "",
2265 "Celeron processor",
2266 "Pentium III processor",
2267 "Intel Pentium III Xeon processor",
2268 "",
2269 "",
2270 "",
2271 "",
2272 "Intel Pentium 4 processor",
2273 nullptr
2274 };
2275
2276
2277 const char* const _feature_edx_id[] = {
2278 "On-Chip FPU",
2279 "Virtual Mode Extensions",
2280 "Debugging Extensions",
2281 "Page Size Extensions",
2282 "Time Stamp Counter",
2283 "Model Specific Registers",
2284 "Physical Address Extension",
2285 "Machine Check Exceptions",
2286 "CMPXCHG8B Instruction",
2287 "On-Chip APIC",
2288 "",
2289 "Fast System Call",
2290 "Memory Type Range Registers",
2291 "Page Global Enable",
2292 "Machine Check Architecture",
2293 "Conditional Mov Instruction",
2294 "Page Attribute Table",
2295 "36-bit Page Size Extension",
2296 "Processor Serial Number",
2297 "CLFLUSH Instruction",
2298 "",
2299 "Debug Trace Store feature",
2300 "ACPI registers in MSR space",
2301 "Intel Architecture MMX Technology",
2302 "Fast Float Point Save and Restore",
2303 "Streaming SIMD extensions",
2304 "Streaming SIMD extensions 2",
2305 "Self-Snoop",
2306 "Hyper Threading",
2307 "Thermal Monitor",
2308 "",
2309 "Pending Break Enable"
2310 };
2311
2312 const char* const _feature_extended_edx_id[] = {
2313 "",
2314 "",
2315 "",
2316 "",
2317 "",
2318 "",
2319 "",
2320 "",
2321 "",
2322 "",
2323 "",
2324 "SYSCALL/SYSRET",
2325 "",
2326 "",
2327 "",
2328 "",
2329 "",
2330 "",
2331 "",
2332 "",
2333 "Execute Disable Bit",
2334 "",
2335 "",
2336 "",
2337 "",
2338 "",
2339 "",
2340 "RDTSCP",
2341 "",
2342 "Intel 64 Architecture",
2343 "",
2344 ""
2345 };
2346
2347 const char* const _feature_ecx_id[] = {
2348 "Streaming SIMD Extensions 3",
2349 "PCLMULQDQ",
2350 "64-bit DS Area",
2351 "MONITOR/MWAIT instructions",
2352 "CPL Qualified Debug Store",
2353 "Virtual Machine Extensions",
2354 "Safer Mode Extensions",
2355 "Enhanced Intel SpeedStep technology",
2356 "Thermal Monitor 2",
2357 "Supplemental Streaming SIMD Extensions 3",
2358 "L1 Context ID",
2359 "",
2360 "Fused Multiply-Add",
2361 "CMPXCHG16B",
2362 "xTPR Update Control",
2363 "Perfmon and Debug Capability",
2364 "",
2365 "Process-context identifiers",
2366 "Direct Cache Access",
2367 "Streaming SIMD extensions 4.1",
2368 "Streaming SIMD extensions 4.2",
2369 "x2APIC",
2370 "MOVBE",
2371 "Popcount instruction",
2372 "TSC-Deadline",
2373 "AESNI",
2374 "XSAVE",
2375 "OSXSAVE",
2376 "AVX",
2377 "F16C",
2378 "RDRAND",
2379 ""
2380 };
2381
2382 const char* const _feature_extended_ecx_id[] = {
2383 "LAHF/SAHF instruction support",
2384 "Core multi-processor legacy mode",
2385 "",
2386 "",
2387 "",
2388 "Advanced Bit Manipulations: LZCNT",
2389 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2390 "Misaligned SSE mode",
2391 "",
2392 "",
2393 "",
2394 "",
2395 "",
2396 "",
2397 "",
2398 "",
2399 "",
2400 "",
2401 "",
2402 "",
2403 "",
2404 "",
2405 "",
2406 "",
2407 "",
2408 "",
2409 "",
2410 "",
2411 "",
2412 "",
2413 "",
2414 ""
2415 };
2416
2417 const char* VM_Version::cpu_model_description(void) {
2418 uint32_t cpu_family = extended_cpu_family();
2419 uint32_t cpu_model = extended_cpu_model();
2420 const char* model = nullptr;
2421
2422 if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2423 for (uint32_t i = 0; i <= cpu_model; i++) {
2424 model = _model_id_pentium_pro[i];
2425 if (model == nullptr) {
2426 break;
2427 }
2428 }
2429 }
2430 return model;
2431 }
2432
2433 const char* VM_Version::cpu_brand_string(void) {
2434 if (_cpu_brand_string == nullptr) {
2435 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2436 if (nullptr == _cpu_brand_string) {
2437 return nullptr;
2438 }
2439 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2440 if (ret_val != OS_OK) {
2441 FREE_C_HEAP_ARRAY(_cpu_brand_string);
2442 _cpu_brand_string = nullptr;
2443 }
2444 }
2445 return _cpu_brand_string;
2446 }
2447
2448 const char* VM_Version::cpu_brand(void) {
2449 const char* brand = nullptr;
2450
2451 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2452 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2453 brand = _brand_id[0];
2454 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2455 brand = _brand_id[i];
2456 }
2457 }
2458 return brand;
2459 }
2460
2461 bool VM_Version::cpu_is_em64t(void) {
2462 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2463 }
2464
2465 bool VM_Version::is_netburst(void) {
2466 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2467 }
2468
2469 bool VM_Version::supports_tscinv_ext(void) {
2470 if (!supports_tscinv_bit()) {
2471 return false;
2472 }
2473
2474 if (is_intel()) {
2475 return true;
2476 }
2477
2478 if (is_amd()) {
2479 return !is_amd_Barcelona();
2480 }
2481
2482 if (is_hygon()) {
2483 return true;
2484 }
2485
2486 return false;
2487 }
2488
2489 void VM_Version::resolve_cpu_information_details(void) {
2490
2491 // in future we want to base this information on proper cpu
2492 // and cache topology enumeration such as:
2493 // Intel 64 Architecture Processor Topology Enumeration
2494 // which supports system cpu and cache topology enumeration
2495 // either using 2xAPICIDs or initial APICIDs
2496
2497 // currently only rough cpu information estimates
2498 // which will not necessarily reflect the exact configuration of the system
2499
2500 // this is the number of logical hardware threads
2501 // visible to the operating system
2502 _no_of_threads = os::processor_count();
2503
2504 // find out number of threads per cpu package
2505 int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2506 if (threads_per_package == 0) {
2507 // Fallback code to avoid div by zero in subsequent code.
2508 // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2509 threads_per_package = threads_per_core() * cores_per_cpu();
2510 }
2511
2512 // use amount of threads visible to the process in order to guess number of sockets
2513 _no_of_sockets = _no_of_threads / threads_per_package;
2514
2515 // process might only see a subset of the total number of threads
2516 // from a single processor package. Virtualization/resource management for example.
2517 // If so then just write a hard 1 as num of pkgs.
2518 if (0 == _no_of_sockets) {
2519 _no_of_sockets = 1;
2520 }
2521
2522 // estimate the number of cores
2523 _no_of_cores = cores_per_cpu() * _no_of_sockets;
2524 }
2525
2526
2527 const char* VM_Version::cpu_family_description(void) {
2528 int cpu_family_id = extended_cpu_family();
2529 if (is_amd()) {
2530 if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2531 return _family_id_amd[cpu_family_id];
2532 }
2533 }
2534 if (is_intel()) {
2535 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2536 return cpu_model_description();
2537 }
2538 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2539 return _family_id_intel[cpu_family_id];
2540 }
2541 }
2542 if (is_zx()) {
2543 int cpu_model_id = extended_cpu_model();
2544 if (cpu_family_id == 7) {
2545 switch (cpu_model_id) {
2546 case 0x1B:
2547 return "wudaokou";
2548 case 0x3B:
2549 return "lujiazui";
2550 case 0x5B:
2551 return "yongfeng";
2552 case 0x6B:
2553 return "shijidadao";
2554 }
2555 } else if (cpu_family_id == 6) {
2556 return "zhangjiang";
2557 }
2558 }
2559 if (is_hygon()) {
2560 return "Dhyana";
2561 }
2562 return "Unknown x86";
2563 }
2564
2565 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2566 assert(buf != nullptr, "buffer is null!");
2567 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2568
2569 const char* cpu_type = nullptr;
2570 const char* x64 = nullptr;
2571
2572 if (is_intel()) {
2573 cpu_type = "Intel";
2574 x64 = cpu_is_em64t() ? " Intel64" : "";
2575 } else if (is_amd()) {
2576 cpu_type = "AMD";
2577 x64 = cpu_is_em64t() ? " AMD64" : "";
2578 } else if (is_zx()) {
2579 cpu_type = "Zhaoxin";
2580 x64 = cpu_is_em64t() ? " x86_64" : "";
2581 } else if (is_hygon()) {
2582 cpu_type = "Hygon";
2583 x64 = cpu_is_em64t() ? " AMD64" : "";
2584 } else {
2585 cpu_type = "Unknown x86";
2586 x64 = cpu_is_em64t() ? " x86_64" : "";
2587 }
2588
2589 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2590 cpu_type,
2591 cpu_family_description(),
2592 supports_ht() ? " (HT)" : "",
2593 supports_sse3() ? " SSE3" : "",
2594 supports_ssse3() ? " SSSE3" : "",
2595 supports_sse4_1() ? " SSE4.1" : "",
2596 supports_sse4_2() ? " SSE4.2" : "",
2597 supports_sse4a() ? " SSE4A" : "",
2598 is_netburst() ? " Netburst" : "",
2599 is_intel_family_core() ? " Core" : "",
2600 x64);
2601
2602 return OS_OK;
2603 }
2604
2605 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2606 assert(buf != nullptr, "buffer is null!");
2607 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2608 assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2609
2610 // invoke newly generated asm code to fetch CPU Brand String
2611 getCPUIDBrandString_stub(&_cpuid_info);
2612
2613 // fetch results into buffer
2614 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0;
2615 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1;
2616 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2;
2617 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2618 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2619 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2620 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2621 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2622 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2623 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2624 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2625 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2626
2627 return OS_OK;
2628 }
2629
2630 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2631 guarantee(buf != nullptr, "buffer is null!");
2632 guarantee(buf_len > 0, "buffer len not enough!");
2633
2634 unsigned int flag = 0;
2635 unsigned int fi = 0;
2636 size_t written = 0;
2637 const char* prefix = "";
2638
2639 #define WRITE_TO_BUF(string) \
2640 { \
2641 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2642 if (res < 0) { \
2643 return buf_len - 1; \
2644 } \
2645 written += res; \
2646 if (prefix[0] == '\0') { \
2647 prefix = ", "; \
2648 } \
2649 }
2650
2651 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2652 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2653 continue; /* no hyperthreading */
2654 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2655 continue; /* no fast system call */
2656 }
2657 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2658 WRITE_TO_BUF(_feature_edx_id[fi]);
2659 }
2660 }
2661
2662 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2663 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2664 WRITE_TO_BUF(_feature_ecx_id[fi]);
2665 }
2666 }
2667
2668 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2669 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2670 WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2671 }
2672 }
2673
2674 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2675 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2676 WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2677 }
2678 }
2679
2680 if (supports_tscinv_bit()) {
2681 WRITE_TO_BUF("Invariant TSC");
2682 }
2683
2684 if (supports_hybrid()) {
2685 WRITE_TO_BUF("Hybrid Architecture");
2686 }
2687
2688 return written;
2689 }
2690
2691 /**
2692 * Write a detailed description of the cpu to a given buffer, including
2693 * feature set.
2694 */
2695 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2696 assert(buf != nullptr, "buffer is null!");
2697 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2698
2699 static const char* unknown = "<unknown>";
2700 char vendor_id[VENDOR_LENGTH];
2701 const char* family = nullptr;
2702 const char* model = nullptr;
2703 const char* brand = nullptr;
2704 int outputLen = 0;
2705
2706 family = cpu_family_description();
2707 if (family == nullptr) {
2708 family = unknown;
2709 }
2710
2711 model = cpu_model_description();
2712 if (model == nullptr) {
2713 model = unknown;
2714 }
2715
2716 brand = cpu_brand_string();
2717
2718 if (brand == nullptr) {
2719 brand = cpu_brand();
2720 if (brand == nullptr) {
2721 brand = unknown;
2722 }
2723 }
2724
2725 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2726 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2727 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2728 vendor_id[VENDOR_LENGTH-1] = '\0';
2729
2730 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2731 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2732 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2733 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2734 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2735 "Supports: ",
2736 brand,
2737 vendor_id,
2738 family,
2739 extended_cpu_family(),
2740 model,
2741 extended_cpu_model(),
2742 cpu_stepping(),
2743 _cpuid_info.std_cpuid1_eax.bits.ext_family,
2744 _cpuid_info.std_cpuid1_eax.bits.ext_model,
2745 _cpuid_info.std_cpuid1_eax.bits.proc_type,
2746 _cpuid_info.std_cpuid1_eax.value,
2747 _cpuid_info.std_cpuid1_ebx.value,
2748 _cpuid_info.std_cpuid1_ecx.value,
2749 _cpuid_info.std_cpuid1_edx.value,
2750 _cpuid_info.ext_cpuid1_eax,
2751 _cpuid_info.ext_cpuid1_ebx,
2752 _cpuid_info.ext_cpuid1_ecx,
2753 _cpuid_info.ext_cpuid1_edx);
2754
2755 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2756 if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2757 return OS_ERR;
2758 }
2759
2760 cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2761
2762 return OS_OK;
2763 }
2764
2765
2766 // Fill in Abstract_VM_Version statics
2767 void VM_Version::initialize_cpu_information() {
2768 assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2769 assert(!_initialized, "shouldn't be initialized yet");
2770 resolve_cpu_information_details();
2771
2772 // initialize cpu_name and cpu_desc
2773 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2774 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2775 _initialized = true;
2776 }
2777
2778 /**
2779 * For information about extracting the frequency from the cpu brand string, please see:
2780 *
2781 * Intel Processor Identification and the CPUID Instruction
2782 * Application Note 485
2783 * May 2012
2784 *
2785 * The return value is the frequency in Hz.
2786 */
2787 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2788 const char* const brand_string = cpu_brand_string();
2789 if (brand_string == nullptr) {
2790 return 0;
2791 }
2792 const int64_t MEGA = 1000000;
2793 int64_t multiplier = 0;
2794 int64_t frequency = 0;
2795 uint8_t idx = 0;
2796 // The brand string buffer is at most 48 bytes.
2797 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2798 for (; idx < 48-2; ++idx) {
2799 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2800 // Search brand string for "yHz" where y is M, G, or T.
2801 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2802 if (brand_string[idx] == 'M') {
2803 multiplier = MEGA;
2804 } else if (brand_string[idx] == 'G') {
2805 multiplier = MEGA * 1000;
2806 } else if (brand_string[idx] == 'T') {
2807 multiplier = MEGA * MEGA;
2808 }
2809 break;
2810 }
2811 }
2812 if (multiplier > 0) {
2813 // Compute frequency (in Hz) from brand string.
2814 if (brand_string[idx-3] == '.') { // if format is "x.xx"
2815 frequency = (brand_string[idx-4] - '0') * multiplier;
2816 frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2817 frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2818 } else { // format is "xxxx"
2819 frequency = (brand_string[idx-4] - '0') * 1000;
2820 frequency += (brand_string[idx-3] - '0') * 100;
2821 frequency += (brand_string[idx-2] - '0') * 10;
2822 frequency += (brand_string[idx-1] - '0');
2823 frequency *= multiplier;
2824 }
2825 }
2826 return frequency;
2827 }
2828
2829
2830 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2831 if (_max_qualified_cpu_frequency == 0) {
2832 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2833 }
2834 return _max_qualified_cpu_frequency;
2835 }
2836
2837 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2838 VM_Features vm_features;
2839
2840 // check the features that must be present
2841 guarantee(std_cpuid1_edx.bits.sse2 != 0, "sse2 is not supported");
2842 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
2843 // clflush_size is size in quadwords (8 bytes).
2844 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == ICache::line_size/8, "clflush size is not supported");
2845
2846 if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2847 vm_features.set_feature(CPU_CX8);
2848 if (std_cpuid1_edx.bits.cmov != 0)
2849 vm_features.set_feature(CPU_CMOV);
2850 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2851 ext_cpuid1_edx.bits.fxsr != 0))
2852 vm_features.set_feature(CPU_FXSR);
2853 // HT flag is set for multi-core processors also.
2854 if (threads_per_core() > 1)
2855 vm_features.set_feature(CPU_HT);
2856 if (std_cpuid1_ecx.bits.sse3 != 0)
2857 vm_features.set_feature(CPU_SSE3);
2858 if (std_cpuid1_ecx.bits.ssse3 != 0)
2859 vm_features.set_feature(CPU_SSSE3);
2860 if (std_cpuid1_ecx.bits.sse4_1 != 0)
2861 vm_features.set_feature(CPU_SSE4_1);
2862 if (std_cpuid1_ecx.bits.sse4_2 != 0)
2863 vm_features.set_feature(CPU_SSE4_2);
2864 if (std_cpuid1_ecx.bits.popcnt != 0)
2865 vm_features.set_feature(CPU_POPCNT);
2866 if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2867 xem_xcr0_eax.bits.apx_f != 0 &&
2868 std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2869 vm_features.set_feature(CPU_APX_F);
2870 }
2871 if (std_cpuid1_ecx.bits.avx != 0 &&
2872 std_cpuid1_ecx.bits.osxsave != 0 &&
2873 xem_xcr0_eax.bits.sse != 0 &&
2874 xem_xcr0_eax.bits.ymm != 0) {
2875 vm_features.set_feature(CPU_AVX);
2876 vm_features.set_feature(CPU_VZEROUPPER);
2877 if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2878 vm_features.set_feature(CPU_SHA512);
2879 if (std_cpuid1_ecx.bits.f16c != 0)
2880 vm_features.set_feature(CPU_F16C);
2881 if (sef_cpuid7_ebx.bits.avx2 != 0) {
2882 vm_features.set_feature(CPU_AVX2);
2883 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2884 vm_features.set_feature(CPU_AVX_IFMA);
2885 }
2886 if (sef_cpuid7_ecx.bits.gfni != 0)
2887 vm_features.set_feature(CPU_GFNI);
2888 if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2889 xem_xcr0_eax.bits.opmask != 0 &&
2890 xem_xcr0_eax.bits.zmm512 != 0 &&
2891 xem_xcr0_eax.bits.zmm32 != 0) {
2892 vm_features.set_feature(CPU_AVX512F);
2893 if (sef_cpuid7_ebx.bits.avx512cd != 0)
2894 vm_features.set_feature(CPU_AVX512CD);
2895 if (sef_cpuid7_ebx.bits.avx512dq != 0)
2896 vm_features.set_feature(CPU_AVX512DQ);
2897 if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2898 vm_features.set_feature(CPU_AVX512_IFMA);
2899 if (sef_cpuid7_ebx.bits.avx512pf != 0)
2900 vm_features.set_feature(CPU_AVX512PF);
2901 if (sef_cpuid7_ebx.bits.avx512er != 0)
2902 vm_features.set_feature(CPU_AVX512ER);
2903 if (sef_cpuid7_ebx.bits.avx512bw != 0)
2904 vm_features.set_feature(CPU_AVX512BW);
2905 if (sef_cpuid7_ebx.bits.avx512vl != 0)
2906 vm_features.set_feature(CPU_AVX512VL);
2907 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2908 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2909 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2910 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2911 if (sef_cpuid7_ecx.bits.vaes != 0)
2912 vm_features.set_feature(CPU_AVX512_VAES);
2913 if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2914 vm_features.set_feature(CPU_AVX512_VNNI);
2915 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2916 vm_features.set_feature(CPU_AVX512_BITALG);
2917 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2918 vm_features.set_feature(CPU_AVX512_VBMI);
2919 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2920 vm_features.set_feature(CPU_AVX512_VBMI2);
2921 }
2922 if (is_intel()) {
2923 if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2924 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2925 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2926 xem_xcr0_eax.bits.opmask != 0 &&
2927 xem_xcr0_eax.bits.zmm512 != 0 &&
2928 xem_xcr0_eax.bits.zmm32 != 0) {
2929 vm_features.set_feature(CPU_AVX10_1);
2930 vm_features.set_feature(CPU_AVX512F);
2931 vm_features.set_feature(CPU_AVX512CD);
2932 vm_features.set_feature(CPU_AVX512DQ);
2933 vm_features.set_feature(CPU_AVX512PF);
2934 vm_features.set_feature(CPU_AVX512ER);
2935 vm_features.set_feature(CPU_AVX512BW);
2936 vm_features.set_feature(CPU_AVX512VL);
2937 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2938 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2939 vm_features.set_feature(CPU_AVX512_VAES);
2940 vm_features.set_feature(CPU_AVX512_VNNI);
2941 vm_features.set_feature(CPU_AVX512_BITALG);
2942 vm_features.set_feature(CPU_AVX512_VBMI);
2943 vm_features.set_feature(CPU_AVX512_VBMI2);
2944 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
2945 vm_features.set_feature(CPU_AVX10_2);
2946 }
2947 }
2948 }
2949 }
2950
2951 if (std_cpuid1_ecx.bits.hv != 0)
2952 vm_features.set_feature(CPU_HV);
2953 if (sef_cpuid7_ebx.bits.bmi1 != 0)
2954 vm_features.set_feature(CPU_BMI1);
2955 if (std_cpuid1_edx.bits.tsc != 0)
2956 vm_features.set_feature(CPU_TSC);
2957 if (ext_cpuid7_edx.bits.tsc_invariance != 0)
2958 vm_features.set_feature(CPU_TSCINV_BIT);
2959 if (std_cpuid1_ecx.bits.aes != 0)
2960 vm_features.set_feature(CPU_AES);
2961 if (ext_cpuid1_ecx.bits.lzcnt != 0)
2962 vm_features.set_feature(CPU_LZCNT);
2963 if (ext_cpuid1_ecx.bits.prefetchw != 0)
2964 vm_features.set_feature(CPU_3DNOW_PREFETCH);
2965 if (sef_cpuid7_ebx.bits.erms != 0)
2966 vm_features.set_feature(CPU_ERMS);
2967 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
2968 vm_features.set_feature(CPU_FSRM);
2969 if (std_cpuid1_ecx.bits.clmul != 0)
2970 vm_features.set_feature(CPU_CLMUL);
2971 if (sef_cpuid7_ebx.bits.rtm != 0)
2972 vm_features.set_feature(CPU_RTM);
2973 if (sef_cpuid7_ebx.bits.adx != 0)
2974 vm_features.set_feature(CPU_ADX);
2975 if (sef_cpuid7_ebx.bits.bmi2 != 0)
2976 vm_features.set_feature(CPU_BMI2);
2977 if (sef_cpuid7_ebx.bits.sha != 0)
2978 vm_features.set_feature(CPU_SHA);
2979 if (std_cpuid1_ecx.bits.fma != 0)
2980 vm_features.set_feature(CPU_FMA);
2981 if (sef_cpuid7_ebx.bits.clflushopt != 0)
2982 vm_features.set_feature(CPU_FLUSHOPT);
2983 if (sef_cpuid7_ebx.bits.clwb != 0)
2984 vm_features.set_feature(CPU_CLWB);
2985 if (ext_cpuid1_edx.bits.rdtscp != 0)
2986 vm_features.set_feature(CPU_RDTSCP);
2987 if (sef_cpuid7_ecx.bits.rdpid != 0)
2988 vm_features.set_feature(CPU_RDPID);
2989
2990 // AMD|Hygon additional features.
2991 if (is_amd_family()) {
2992 // PREFETCHW was checked above, check TDNOW here.
2993 if ((ext_cpuid1_edx.bits.tdnow != 0))
2994 vm_features.set_feature(CPU_3DNOW_PREFETCH);
2995 if (ext_cpuid1_ecx.bits.sse4a != 0)
2996 vm_features.set_feature(CPU_SSE4A);
2997 }
2998
2999 // Intel additional features.
3000 if (is_intel()) {
3001 if (sef_cpuid7_edx.bits.serialize != 0)
3002 vm_features.set_feature(CPU_SERIALIZE);
3003 if (sef_cpuid7_edx.bits.hybrid != 0)
3004 vm_features.set_feature(CPU_HYBRID);
3005 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3006 vm_features.set_feature(CPU_AVX512_FP16);
3007 }
3008
3009 // ZX additional features.
3010 if (is_zx()) {
3011 // We do not know if these are supported by ZX, so we cannot trust
3012 // common CPUID bit for them.
3013 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3014 vm_features.clear_feature(CPU_CLWB);
3015 }
3016
3017 // Protection key features.
3018 if (sef_cpuid7_ecx.bits.pku != 0) {
3019 vm_features.set_feature(CPU_PKU);
3020 }
3021 if (sef_cpuid7_ecx.bits.ospke != 0) {
3022 vm_features.set_feature(CPU_OSPKE);
3023 }
3024
3025 // Control flow enforcement (CET) features.
3026 if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3027 vm_features.set_feature(CPU_CET_SS);
3028 }
3029 if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3030 vm_features.set_feature(CPU_CET_IBT);
3031 }
3032
3033 // Composite features.
3034 if (supports_tscinv_bit() &&
3035 ((is_amd_family() && !is_amd_Barcelona()) ||
3036 is_intel_tsc_synched_at_init())) {
3037 vm_features.set_feature(CPU_TSCINV);
3038 }
3039 return vm_features;
3040 }
3041
3042 bool VM_Version::os_supports_avx_vectors() {
3043 bool retVal = false;
3044 int nreg = 4;
3045 if (supports_evex()) {
3046 // Verify that OS save/restore all bits of EVEX registers
3047 // during signal processing.
3048 retVal = true;
3049 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3050 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3051 retVal = false;
3052 break;
3053 }
3054 }
3055 } else if (supports_avx()) {
3056 // Verify that OS save/restore all bits of AVX registers
3057 // during signal processing.
3058 retVal = true;
3059 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3060 if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3061 retVal = false;
3062 break;
3063 }
3064 }
3065 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3066 if (retVal == false) {
3067 // Verify that OS save/restore all bits of EVEX registers
3068 // during signal processing.
3069 retVal = true;
3070 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3071 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3072 retVal = false;
3073 break;
3074 }
3075 }
3076 }
3077 }
3078 return retVal;
3079 }
3080
3081 bool VM_Version::os_supports_apx_egprs() {
3082 if (!supports_apx_f()) {
3083 return false;
3084 }
3085 if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3086 _cpuid_info.apx_save[1] != egpr_test_value()) {
3087 return false;
3088 }
3089 return true;
3090 }
3091
3092 uint VM_Version::cores_per_cpu() {
3093 uint result = 1;
3094 if (is_intel()) {
3095 bool supports_topology = supports_processor_topology();
3096 if (supports_topology) {
3097 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3098 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3099 }
3100 if (!supports_topology || result == 0) {
3101 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3102 }
3103 } else if (is_amd_family()) {
3104 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3105 if (cpu_family() >= 0x17) { // Zen or later
3106 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3107 }
3108 } else if (is_zx()) {
3109 bool supports_topology = supports_processor_topology();
3110 if (supports_topology) {
3111 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3112 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3113 }
3114 if (!supports_topology || result == 0) {
3115 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3116 }
3117 }
3118 return result;
3119 }
3120
3121 uint VM_Version::threads_per_core() {
3122 uint result = 1;
3123 if (is_intel() && supports_processor_topology()) {
3124 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3125 } else if (is_zx() && supports_processor_topology()) {
3126 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3127 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3128 if (cpu_family() >= 0x17) {
3129 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3130 } else {
3131 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3132 cores_per_cpu();
3133 }
3134 }
3135 return (result == 0 ? 1 : result);
3136 }
3137
3138 uint VM_Version::L1_line_size() {
3139 uint result = 0;
3140 if (is_intel()) {
3141 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3142 } else if (is_amd_family()) {
3143 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3144 } else if (is_zx()) {
3145 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3146 }
3147 if (result < 32) // not defined ?
3148 result = 32; // 32 bytes by default on x86 and other x64
3149 return result;
3150 }
3151
3152 bool VM_Version::is_intel_tsc_synched_at_init() {
3153 if (is_intel_family_core()) {
3154 uint32_t ext_model = extended_cpu_model();
3155 if (ext_model == CPU_MODEL_NEHALEM_EP ||
3156 ext_model == CPU_MODEL_WESTMERE_EP ||
3157 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3158 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3159 // <= 2-socket invariant tsc support. EX versions are usually used
3160 // in > 2-socket systems and likely don't synchronize tscs at
3161 // initialization.
3162 // Code that uses tsc values must be prepared for them to arbitrarily
3163 // jump forward or backward.
3164 return true;
3165 }
3166 }
3167 return false;
3168 }
3169
3170 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3171 // Hardware prefetching (distance/size in bytes):
3172 // Pentium 3 - 64 / 32
3173 // Pentium 4 - 256 / 128
3174 // Athlon - 64 / 32 ????
3175 // Opteron - 128 / 64 only when 2 sequential cache lines accessed
3176 // Core - 128 / 64
3177 //
3178 // Software prefetching (distance in bytes / instruction with best score):
3179 // Pentium 3 - 128 / prefetchnta
3180 // Pentium 4 - 512 / prefetchnta
3181 // Athlon - 128 / prefetchnta
3182 // Opteron - 256 / prefetchnta
3183 // Core - 256 / prefetchnta
3184 // It will be used only when AllocatePrefetchStyle > 0
3185
3186 if (is_amd_family()) { // AMD | Hygon
3187 return 256; // Opteron
3188 } else if (is_zx()) {
3189 return 256;
3190 } else { // Intel
3191 if (supports_sse3() && is_intel_server_family()) {
3192 if (is_intel_modern_cpu()) { // Nehalem based cpus
3193 return 192;
3194 } else if (use_watermark_prefetch) { // watermark prefetching on Core
3195 return 384;
3196 }
3197 }
3198 if (is_intel_server_family()) {
3199 return 256; // Pentium M, Core, Core2
3200 } else {
3201 return 512; // Pentium 4
3202 }
3203 }
3204 }
3205
3206 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3207 assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3208 switch (id) {
3209 case vmIntrinsics::_floatToFloat16:
3210 case vmIntrinsics::_float16ToFloat:
3211 if (!supports_float16()) {
3212 return false;
3213 }
3214 break;
3215 default:
3216 break;
3217 }
3218 return true;
3219 }
3220
3221 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3222 int i = 0;
3223 ss.join([&]() {
3224 const char* str = nullptr;
3225 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3226 if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3227 str = _features_names[i];
3228 }
3229 i += 1;
3230 }
3231 return str;
3232 }, ", ");
3233 }
3234
3235 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3236 VM_Features* features = (VM_Features*)features_buffer;
3237 insert_features_names(*features, ss);
3238 }
3239
3240 void VM_Version::get_missing_features_name(void* features_set1, void* features_set2, stringStream& ss) {
3241 VM_Features* vm_features_set1 = (VM_Features*)features_set1;
3242 VM_Features* vm_features_set2 = (VM_Features*)features_set2;
3243 int i = 0;
3244 ss.join([&]() {
3245 const char* str = nullptr;
3246 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3247 Feature_Flag flag = (Feature_Flag)i;
3248 if (vm_features_set1->supports_feature(flag) && !vm_features_set2->supports_feature(flag)) {
3249 str = _features_names[i];
3250 }
3251 i += 1;
3252 }
3253 return str;
3254 }, ", ");
3255 }
3256
3257 int VM_Version::cpu_features_size() {
3258 return sizeof(VM_Features);
3259 }
3260
3261 void VM_Version::store_cpu_features(void* buf) {
3262 VM_Features copy = _features.aot_code_cache_features();
3263 memcpy(buf, ©, sizeof(VM_Features));
3264 }
3265
3266 bool VM_Version::verify_aot_code_cache_features(void* features_buffer) {
3267 VM_Features* features_to_test = (VM_Features*)features_buffer;
3268 VM_Features rt_features = _features.aot_code_cache_features();
3269 return rt_features.verify_aot_code_cache_features(features_to_test);
3270 }