1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/codeBlob.hpp"
29 #include "compiler/compilerDefinitions.inline.hpp"
30 #include "jvm.h"
31 #include "logging/log.hpp"
32 #include "logging/logStream.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "memory/universe.hpp"
35 #include "runtime/globals_extension.hpp"
36 #include "runtime/java.hpp"
37 #include "runtime/os.inline.hpp"
38 #include "runtime/stubCodeGenerator.hpp"
39 #include "runtime/vm_version.hpp"
40 #include "utilities/checkedCast.hpp"
41 #include "utilities/ostream.hpp"
42 #include "utilities/powerOfTwo.hpp"
43 #include "utilities/virtualizationSupport.hpp"
44
45 int VM_Version::_cpu;
46 int VM_Version::_model;
47 int VM_Version::_stepping;
48 bool VM_Version::_has_intel_jcc_erratum;
49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
50
51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) XSTR(name),
52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
53 #undef DECLARE_CPU_FEATURE_NAME
54
55 // Address of instruction which causes SEGV
56 address VM_Version::_cpuinfo_segv_addr = nullptr;
57 // Address of instruction after the one which causes SEGV
58 address VM_Version::_cpuinfo_cont_addr = nullptr;
59 // Address of instruction which causes APX specific SEGV
60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
61 // Address of instruction after the one which causes APX specific SEGV
62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
63
64 static BufferBlob* stub_blob;
65 static const int stub_size = 2550;
66
67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
68
69 VM_Version::VM_Features VM_Version::_features;
70 VM_Version::VM_Features VM_Version::_cpu_features;
71
72 extern "C" {
73 typedef void (*get_cpu_info_stub_t)(void*);
74 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
75 typedef void (*clear_apx_test_state_t)(void);
76 typedef void (*getCPUIDBrandString_stub_t)(void*);
77 }
78 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
79 static detect_virt_stub_t detect_virt_stub = nullptr;
80 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
81 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
82
83 bool VM_Version::supports_clflush() {
84 // clflush should always be available on x86_64
85 // if not we are in real trouble because we rely on it
86 // to flush the code cache.
87 // Unfortunately, Assembler::clflush is currently called as part
88 // of generation of the code cache flush routine. This happens
89 // under Universe::init before the processor features are set
90 // up. Assembler::flush calls this routine to check that clflush
91 // is allowed. So, we give the caller a free pass if Universe init
92 // is still in progress.
93 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
94 return true;
95 }
96
97 #define CPUID_STANDARD_FN 0x0
98 #define CPUID_STANDARD_FN_1 0x1
99 #define CPUID_STANDARD_FN_4 0x4
100 #define CPUID_STANDARD_FN_B 0xb
101
102 #define CPUID_EXTENDED_FN 0x80000000
103 #define CPUID_EXTENDED_FN_1 0x80000001
104 #define CPUID_EXTENDED_FN_2 0x80000002
105 #define CPUID_EXTENDED_FN_3 0x80000003
106 #define CPUID_EXTENDED_FN_4 0x80000004
107 #define CPUID_EXTENDED_FN_7 0x80000007
108 #define CPUID_EXTENDED_FN_8 0x80000008
109
110 class VM_Version_StubGenerator: public StubCodeGenerator {
111 public:
112
113 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
114
115 address clear_apx_test_state() {
116 # define __ _masm->
117 address start = __ pc();
118 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
119 // handling guarantees that preserved register values post signal handling were
120 // re-instantiated by operating system and not because they were not modified externally.
121
122 bool save_apx = UseAPX;
123 VM_Version::set_apx_cpuFeatures();
124 UseAPX = true;
125 // EGPR state save/restoration.
126 __ mov64(r16, 0L);
127 __ mov64(r31, 0L);
128 UseAPX = save_apx;
129 VM_Version::clean_cpuFeatures();
130 __ ret(0);
131 return start;
132 }
133
134 address generate_get_cpu_info() {
135 // Flags to test CPU type.
136 const uint32_t HS_EFL_AC = 0x40000;
137 const uint32_t HS_EFL_ID = 0x200000;
138 // Values for when we don't have a CPUID instruction.
139 const int CPU_FAMILY_SHIFT = 8;
140 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
141 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
142 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
143
144 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
145 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
146 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
147 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
148
149 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
150 # define __ _masm->
151
152 address start = __ pc();
153
154 //
155 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
156 //
157 // rcx and rdx are first and second argument registers on windows
158
159 __ push(rbp);
160 __ mov(rbp, c_rarg0); // cpuid_info address
161 __ push(rbx);
162 __ push(rsi);
163 __ pushf(); // preserve rbx, and flags
164 __ pop(rax);
165 __ push(rax);
166 __ mov(rcx, rax);
167 //
168 // if we are unable to change the AC flag, we have a 386
169 //
170 __ xorl(rax, HS_EFL_AC);
171 __ push(rax);
172 __ popf();
173 __ pushf();
174 __ pop(rax);
175 __ cmpptr(rax, rcx);
176 __ jccb(Assembler::notEqual, detect_486);
177
178 __ movl(rax, CPU_FAMILY_386);
179 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
180 __ jmp(done);
181
182 //
183 // If we are unable to change the ID flag, we have a 486 which does
184 // not support the "cpuid" instruction.
185 //
186 __ bind(detect_486);
187 __ mov(rax, rcx);
188 __ xorl(rax, HS_EFL_ID);
189 __ push(rax);
190 __ popf();
191 __ pushf();
192 __ pop(rax);
193 __ cmpptr(rcx, rax);
194 __ jccb(Assembler::notEqual, detect_586);
195
196 __ bind(cpu486);
197 __ movl(rax, CPU_FAMILY_486);
198 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
199 __ jmp(done);
200
201 //
202 // At this point, we have a chip which supports the "cpuid" instruction
203 //
204 __ bind(detect_586);
205 __ xorl(rax, rax);
206 __ cpuid();
207 __ orl(rax, rax);
208 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
209 // value of at least 1, we give up and
210 // assume a 486
211 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
212 __ movl(Address(rsi, 0), rax);
213 __ movl(Address(rsi, 4), rbx);
214 __ movl(Address(rsi, 8), rcx);
215 __ movl(Address(rsi,12), rdx);
216
217 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
218 __ jccb(Assembler::belowEqual, std_cpuid4);
219
220 //
221 // cpuid(0xB) Processor Topology
222 //
223 __ movl(rax, 0xb);
224 __ xorl(rcx, rcx); // Threads level
225 __ cpuid();
226
227 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
228 __ movl(Address(rsi, 0), rax);
229 __ movl(Address(rsi, 4), rbx);
230 __ movl(Address(rsi, 8), rcx);
231 __ movl(Address(rsi,12), rdx);
232
233 __ movl(rax, 0xb);
234 __ movl(rcx, 1); // Cores level
235 __ cpuid();
236 __ push(rax);
237 __ andl(rax, 0x1f); // Determine if valid topology level
238 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
239 __ andl(rax, 0xffff);
240 __ pop(rax);
241 __ jccb(Assembler::equal, std_cpuid4);
242
243 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
244 __ movl(Address(rsi, 0), rax);
245 __ movl(Address(rsi, 4), rbx);
246 __ movl(Address(rsi, 8), rcx);
247 __ movl(Address(rsi,12), rdx);
248
249 __ movl(rax, 0xb);
250 __ movl(rcx, 2); // Packages level
251 __ cpuid();
252 __ push(rax);
253 __ andl(rax, 0x1f); // Determine if valid topology level
254 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
255 __ andl(rax, 0xffff);
256 __ pop(rax);
257 __ jccb(Assembler::equal, std_cpuid4);
258
259 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
260 __ movl(Address(rsi, 0), rax);
261 __ movl(Address(rsi, 4), rbx);
262 __ movl(Address(rsi, 8), rcx);
263 __ movl(Address(rsi,12), rdx);
264
265 //
266 // cpuid(0x4) Deterministic cache params
267 //
268 __ bind(std_cpuid4);
269 __ movl(rax, 4);
270 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
271 __ jccb(Assembler::greater, std_cpuid1);
272
273 __ xorl(rcx, rcx); // L1 cache
274 __ cpuid();
275 __ push(rax);
276 __ andl(rax, 0x1f); // Determine if valid cache parameters used
277 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
278 __ pop(rax);
279 __ jccb(Assembler::equal, std_cpuid1);
280
281 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
282 __ movl(Address(rsi, 0), rax);
283 __ movl(Address(rsi, 4), rbx);
284 __ movl(Address(rsi, 8), rcx);
285 __ movl(Address(rsi,12), rdx);
286
287 //
288 // Standard cpuid(0x1)
289 //
290 __ bind(std_cpuid1);
291 __ movl(rax, 1);
292 __ cpuid();
293 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
294 __ movl(Address(rsi, 0), rax);
295 __ movl(Address(rsi, 4), rbx);
296 __ movl(Address(rsi, 8), rcx);
297 __ movl(Address(rsi,12), rdx);
298
299 //
300 // Check if OS has enabled XGETBV instruction to access XCR0
301 // (OSXSAVE feature flag) and CPU supports AVX
302 //
303 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
304 __ cmpl(rcx, 0x18000000);
305 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
306
307 //
308 // XCR0, XFEATURE_ENABLED_MASK register
309 //
310 __ xorl(rcx, rcx); // zero for XCR0 register
311 __ xgetbv();
312 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
313 __ movl(Address(rsi, 0), rax);
314 __ movl(Address(rsi, 4), rdx);
315
316 //
317 // cpuid(0x7) Structured Extended Features Enumeration Leaf.
318 //
319 __ bind(sef_cpuid);
320 __ movl(rax, 7);
321 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
322 __ jccb(Assembler::greater, ext_cpuid);
323 // ECX = 0
324 __ xorl(rcx, rcx);
325 __ cpuid();
326 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
327 __ movl(Address(rsi, 0), rax);
328 __ movl(Address(rsi, 4), rbx);
329 __ movl(Address(rsi, 8), rcx);
330 __ movl(Address(rsi, 12), rdx);
331
332 //
333 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
334 //
335 __ bind(sefsl1_cpuid);
336 __ movl(rax, 7);
337 __ movl(rcx, 1);
338 __ cpuid();
339 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
340 __ movl(Address(rsi, 0), rax);
341 __ movl(Address(rsi, 4), rdx);
342
343 //
344 // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
345 //
346 __ bind(std_cpuid29);
347 __ movl(rax, 0x29);
348 __ movl(rcx, 0);
349 __ cpuid();
350 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
351 __ movl(Address(rsi, 0), rbx);
352
353 //
354 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
355 //
356 __ bind(std_cpuid24);
357 __ movl(rax, 0x24);
358 __ movl(rcx, 0);
359 __ cpuid();
360 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
361 __ movl(Address(rsi, 0), rax);
362 __ movl(Address(rsi, 4), rbx);
363
364 //
365 // Extended cpuid(0x80000000)
366 //
367 __ bind(ext_cpuid);
368 __ movl(rax, 0x80000000);
369 __ cpuid();
370 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
371 __ jcc(Assembler::belowEqual, done);
372 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
373 __ jcc(Assembler::belowEqual, ext_cpuid1);
374 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
375 __ jccb(Assembler::belowEqual, ext_cpuid5);
376 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
377 __ jccb(Assembler::belowEqual, ext_cpuid7);
378 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
379 __ jccb(Assembler::belowEqual, ext_cpuid8);
380 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
381 __ jccb(Assembler::below, ext_cpuid8);
382 //
383 // Extended cpuid(0x8000001E)
384 //
385 __ movl(rax, 0x8000001E);
386 __ cpuid();
387 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
388 __ movl(Address(rsi, 0), rax);
389 __ movl(Address(rsi, 4), rbx);
390 __ movl(Address(rsi, 8), rcx);
391 __ movl(Address(rsi,12), rdx);
392
393 //
394 // Extended cpuid(0x80000008)
395 //
396 __ bind(ext_cpuid8);
397 __ movl(rax, 0x80000008);
398 __ cpuid();
399 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
400 __ movl(Address(rsi, 0), rax);
401 __ movl(Address(rsi, 4), rbx);
402 __ movl(Address(rsi, 8), rcx);
403 __ movl(Address(rsi,12), rdx);
404
405 //
406 // Extended cpuid(0x80000007)
407 //
408 __ bind(ext_cpuid7);
409 __ movl(rax, 0x80000007);
410 __ cpuid();
411 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
412 __ movl(Address(rsi, 0), rax);
413 __ movl(Address(rsi, 4), rbx);
414 __ movl(Address(rsi, 8), rcx);
415 __ movl(Address(rsi,12), rdx);
416
417 //
418 // Extended cpuid(0x80000005)
419 //
420 __ bind(ext_cpuid5);
421 __ movl(rax, 0x80000005);
422 __ cpuid();
423 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
424 __ movl(Address(rsi, 0), rax);
425 __ movl(Address(rsi, 4), rbx);
426 __ movl(Address(rsi, 8), rcx);
427 __ movl(Address(rsi,12), rdx);
428
429 //
430 // Extended cpuid(0x80000001)
431 //
432 __ bind(ext_cpuid1);
433 __ movl(rax, 0x80000001);
434 __ cpuid();
435 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
436 __ movl(Address(rsi, 0), rax);
437 __ movl(Address(rsi, 4), rbx);
438 __ movl(Address(rsi, 8), rcx);
439 __ movl(Address(rsi,12), rdx);
440
441 //
442 // Check if OS has enabled XGETBV instruction to access XCR0
443 // (OSXSAVE feature flag) and CPU supports APX
444 //
445 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
446 // and XCRO[19] bit for OS support to save/restore extended GPR state.
447 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
448 __ movl(rax, 0x200000);
449 __ andl(rax, Address(rsi, 4));
450 __ jcc(Assembler::equal, vector_save_restore);
451 // check _cpuid_info.xem_xcr0_eax.bits.apx_f
452 __ movl(rax, 0x80000);
453 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
454 __ jcc(Assembler::equal, vector_save_restore);
455
456 bool save_apx = UseAPX;
457 VM_Version::set_apx_cpuFeatures();
458 UseAPX = true;
459 __ mov64(r16, VM_Version::egpr_test_value());
460 __ mov64(r31, VM_Version::egpr_test_value());
461 __ xorl(rsi, rsi);
462 VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
463 // Generate SEGV
464 __ movl(rax, Address(rsi, 0));
465
466 VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
467 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
468 __ movq(Address(rsi, 0), r16);
469 __ movq(Address(rsi, 8), r31);
470
471 UseAPX = save_apx;
472 __ bind(vector_save_restore);
473 //
474 // Check if OS has enabled XGETBV instruction to access XCR0
475 // (OSXSAVE feature flag) and CPU supports AVX
476 //
477 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
478 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
479 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
480 __ cmpl(rcx, 0x18000000);
481 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
482
483 __ movl(rax, 0x6);
484 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
485 __ cmpl(rax, 0x6);
486 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
487
488 // we need to bridge farther than imm8, so we use this island as a thunk
489 __ bind(done);
490 __ jmp(wrapup);
491
492 __ bind(start_simd_check);
493 //
494 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
495 // registers are not restored after a signal processing.
496 // Generate SEGV here (reference through null)
497 // and check upper YMM/ZMM bits after it.
498 //
499 int saved_useavx = UseAVX;
500 int saved_usesse = UseSSE;
501
502 // If UseAVX is uninitialized or is set by the user to include EVEX
503 if (use_evex) {
504 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
505 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
506 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
507 __ movl(rax, 0x10000);
508 __ andl(rax, Address(rsi, 4));
509 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
510 __ movl(rbx, 0x80000);
511 __ andl(rbx, Address(rsi, 4));
512 __ orl(rax, rbx);
513 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
514 // check _cpuid_info.xem_xcr0_eax.bits.opmask
515 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
516 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
517 __ movl(rax, 0xE0);
518 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
519 __ cmpl(rax, 0xE0);
520 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
521
522 if (FLAG_IS_DEFAULT(UseAVX)) {
523 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
524 __ movl(rax, Address(rsi, 0));
525 __ cmpl(rax, 0x50654); // If it is Skylake
526 __ jcc(Assembler::equal, legacy_setup);
527 }
528 // EVEX setup: run in lowest evex mode
529 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
530 UseAVX = 3;
531 UseSSE = 2;
532 #ifdef _WINDOWS
533 // xmm5-xmm15 are not preserved by caller on windows
534 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
535 __ subptr(rsp, 64);
536 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
537 __ subptr(rsp, 64);
538 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
539 __ subptr(rsp, 64);
540 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
541 #endif // _WINDOWS
542
543 // load value into all 64 bytes of zmm7 register
544 __ movl(rcx, VM_Version::ymm_test_value());
545 __ movdl(xmm0, rcx);
546 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
547 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
548 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
549 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
550 VM_Version::clean_cpuFeatures();
551 __ jmp(save_restore_except);
552 }
553
554 __ bind(legacy_setup);
555 // AVX setup
556 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
557 UseAVX = 1;
558 UseSSE = 2;
559 #ifdef _WINDOWS
560 __ subptr(rsp, 32);
561 __ vmovdqu(Address(rsp, 0), xmm7);
562 __ subptr(rsp, 32);
563 __ vmovdqu(Address(rsp, 0), xmm8);
564 __ subptr(rsp, 32);
565 __ vmovdqu(Address(rsp, 0), xmm15);
566 #endif // _WINDOWS
567
568 // load value into all 32 bytes of ymm7 register
569 __ movl(rcx, VM_Version::ymm_test_value());
570
571 __ movdl(xmm0, rcx);
572 __ pshufd(xmm0, xmm0, 0x00);
573 __ vinsertf128_high(xmm0, xmm0);
574 __ vmovdqu(xmm7, xmm0);
575 __ vmovdqu(xmm8, xmm0);
576 __ vmovdqu(xmm15, xmm0);
577 VM_Version::clean_cpuFeatures();
578
579 __ bind(save_restore_except);
580 __ xorl(rsi, rsi);
581 VM_Version::set_cpuinfo_segv_addr(__ pc());
582 // Generate SEGV
583 __ movl(rax, Address(rsi, 0));
584
585 VM_Version::set_cpuinfo_cont_addr(__ pc());
586 // Returns here after signal. Save xmm0 to check it later.
587
588 // If UseAVX is uninitialized or is set by the user to include EVEX
589 if (use_evex) {
590 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
591 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
592 __ movl(rax, 0x10000);
593 __ andl(rax, Address(rsi, 4));
594 __ jcc(Assembler::equal, legacy_save_restore);
595 // check _cpuid_info.xem_xcr0_eax.bits.opmask
596 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
597 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
598 __ movl(rax, 0xE0);
599 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
600 __ cmpl(rax, 0xE0);
601 __ jcc(Assembler::notEqual, legacy_save_restore);
602
603 if (FLAG_IS_DEFAULT(UseAVX)) {
604 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
605 __ movl(rax, Address(rsi, 0));
606 __ cmpl(rax, 0x50654); // If it is Skylake
607 __ jcc(Assembler::equal, legacy_save_restore);
608 }
609 // EVEX check: run in lowest evex mode
610 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
611 UseAVX = 3;
612 UseSSE = 2;
613 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
614 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
615 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
616 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
617 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
618
619 #ifdef _WINDOWS
620 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
621 __ addptr(rsp, 64);
622 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
623 __ addptr(rsp, 64);
624 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
625 __ addptr(rsp, 64);
626 #endif // _WINDOWS
627 generate_vzeroupper(wrapup);
628 VM_Version::clean_cpuFeatures();
629 UseAVX = saved_useavx;
630 UseSSE = saved_usesse;
631 __ jmp(wrapup);
632 }
633
634 __ bind(legacy_save_restore);
635 // AVX check
636 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
637 UseAVX = 1;
638 UseSSE = 2;
639 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
640 __ vmovdqu(Address(rsi, 0), xmm0);
641 __ vmovdqu(Address(rsi, 32), xmm7);
642 __ vmovdqu(Address(rsi, 64), xmm8);
643 __ vmovdqu(Address(rsi, 96), xmm15);
644
645 #ifdef _WINDOWS
646 __ vmovdqu(xmm15, Address(rsp, 0));
647 __ addptr(rsp, 32);
648 __ vmovdqu(xmm8, Address(rsp, 0));
649 __ addptr(rsp, 32);
650 __ vmovdqu(xmm7, Address(rsp, 0));
651 __ addptr(rsp, 32);
652 #endif // _WINDOWS
653
654 generate_vzeroupper(wrapup);
655 VM_Version::clean_cpuFeatures();
656 UseAVX = saved_useavx;
657 UseSSE = saved_usesse;
658
659 __ bind(wrapup);
660 __ popf();
661 __ pop(rsi);
662 __ pop(rbx);
663 __ pop(rbp);
664 __ ret(0);
665
666 # undef __
667
668 return start;
669 };
670 void generate_vzeroupper(Label& L_wrapup) {
671 # define __ _masm->
672 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
673 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
674 __ jcc(Assembler::notEqual, L_wrapup);
675 __ movl(rcx, 0x0FFF0FF0);
676 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
677 __ andl(rcx, Address(rsi, 0));
678 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
679 __ jcc(Assembler::equal, L_wrapup);
680 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
681 __ jcc(Assembler::equal, L_wrapup);
682 // vzeroupper() will use a pre-computed instruction sequence that we
683 // can't compute until after we've determined CPU capabilities. Use
684 // uncached variant here directly to be able to bootstrap correctly
685 __ vzeroupper_uncached();
686 # undef __
687 }
688 address generate_detect_virt() {
689 StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
690 # define __ _masm->
691
692 address start = __ pc();
693
694 // Evacuate callee-saved registers
695 __ push(rbp);
696 __ push(rbx);
697 __ push(rsi); // for Windows
698
699 __ mov(rax, c_rarg0); // CPUID leaf
700 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
701
702 __ cpuid();
703
704 // Store result to register array
705 __ movl(Address(rsi, 0), rax);
706 __ movl(Address(rsi, 4), rbx);
707 __ movl(Address(rsi, 8), rcx);
708 __ movl(Address(rsi, 12), rdx);
709
710 // Epilogue
711 __ pop(rsi);
712 __ pop(rbx);
713 __ pop(rbp);
714 __ ret(0);
715
716 # undef __
717
718 return start;
719 };
720
721
722 address generate_getCPUIDBrandString(void) {
723 // Flags to test CPU type.
724 const uint32_t HS_EFL_AC = 0x40000;
725 const uint32_t HS_EFL_ID = 0x200000;
726 // Values for when we don't have a CPUID instruction.
727 const int CPU_FAMILY_SHIFT = 8;
728 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
729 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
730
731 Label detect_486, cpu486, detect_586, done, ext_cpuid;
732
733 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
734 # define __ _masm->
735
736 address start = __ pc();
737
738 //
739 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
740 //
741 // rcx and rdx are first and second argument registers on windows
742
743 __ push(rbp);
744 __ mov(rbp, c_rarg0); // cpuid_info address
745 __ push(rbx);
746 __ push(rsi);
747 __ pushf(); // preserve rbx, and flags
748 __ pop(rax);
749 __ push(rax);
750 __ mov(rcx, rax);
751 //
752 // if we are unable to change the AC flag, we have a 386
753 //
754 __ xorl(rax, HS_EFL_AC);
755 __ push(rax);
756 __ popf();
757 __ pushf();
758 __ pop(rax);
759 __ cmpptr(rax, rcx);
760 __ jccb(Assembler::notEqual, detect_486);
761
762 __ movl(rax, CPU_FAMILY_386);
763 __ jmp(done);
764
765 //
766 // If we are unable to change the ID flag, we have a 486 which does
767 // not support the "cpuid" instruction.
768 //
769 __ bind(detect_486);
770 __ mov(rax, rcx);
771 __ xorl(rax, HS_EFL_ID);
772 __ push(rax);
773 __ popf();
774 __ pushf();
775 __ pop(rax);
776 __ cmpptr(rcx, rax);
777 __ jccb(Assembler::notEqual, detect_586);
778
779 __ bind(cpu486);
780 __ movl(rax, CPU_FAMILY_486);
781 __ jmp(done);
782
783 //
784 // At this point, we have a chip which supports the "cpuid" instruction
785 //
786 __ bind(detect_586);
787 __ xorl(rax, rax);
788 __ cpuid();
789 __ orl(rax, rax);
790 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
791 // value of at least 1, we give up and
792 // assume a 486
793
794 //
795 // Extended cpuid(0x80000000) for processor brand string detection
796 //
797 __ bind(ext_cpuid);
798 __ movl(rax, CPUID_EXTENDED_FN);
799 __ cpuid();
800 __ cmpl(rax, CPUID_EXTENDED_FN_4);
801 __ jcc(Assembler::below, done);
802
803 //
804 // Extended cpuid(0x80000002) // first 16 bytes in brand string
805 //
806 __ movl(rax, CPUID_EXTENDED_FN_2);
807 __ cpuid();
808 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
809 __ movl(Address(rsi, 0), rax);
810 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
811 __ movl(Address(rsi, 0), rbx);
812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
813 __ movl(Address(rsi, 0), rcx);
814 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
815 __ movl(Address(rsi,0), rdx);
816
817 //
818 // Extended cpuid(0x80000003) // next 16 bytes in brand string
819 //
820 __ movl(rax, CPUID_EXTENDED_FN_3);
821 __ cpuid();
822 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
823 __ movl(Address(rsi, 0), rax);
824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
825 __ movl(Address(rsi, 0), rbx);
826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
827 __ movl(Address(rsi, 0), rcx);
828 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
829 __ movl(Address(rsi,0), rdx);
830
831 //
832 // Extended cpuid(0x80000004) // last 16 bytes in brand string
833 //
834 __ movl(rax, CPUID_EXTENDED_FN_4);
835 __ cpuid();
836 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
837 __ movl(Address(rsi, 0), rax);
838 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
839 __ movl(Address(rsi, 0), rbx);
840 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
841 __ movl(Address(rsi, 0), rcx);
842 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
843 __ movl(Address(rsi,0), rdx);
844
845 //
846 // return
847 //
848 __ bind(done);
849 __ popf();
850 __ pop(rsi);
851 __ pop(rbx);
852 __ pop(rbp);
853 __ ret(0);
854
855 # undef __
856
857 return start;
858 };
859 };
860
861 void VM_Version::get_processor_features() {
862
863 _cpu = 4; // 486 by default
864 _model = 0;
865 _stepping = 0;
866 _logical_processors_per_package = 1;
867 // i486 internal cache is both I&D and has a 16-byte line size
868 _L1_data_cache_line_size = 16;
869
870 // Get raw processor info
871
872 get_cpu_info_stub(&_cpuid_info);
873
874 assert_is_initialized();
875 _cpu = extended_cpu_family();
876 _model = extended_cpu_model();
877 _stepping = cpu_stepping();
878
879 if (cpu_family() > 4) { // it supports CPUID
880 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
881 _cpu_features = _features; // Preserve features
882 // Logical processors are only available on P4s and above,
883 // and only if hyperthreading is available.
884 _logical_processors_per_package = logical_processor_count();
885 _L1_data_cache_line_size = L1_line_size();
886 }
887
888 // xchg and xadd instructions
889 _supports_atomic_getset4 = true;
890 _supports_atomic_getadd4 = true;
891 _supports_atomic_getset8 = true;
892 _supports_atomic_getadd8 = true;
893
894 // OS should support SSE for x64 and hardware should support at least SSE2.
895 if (!VM_Version::supports_sse2()) {
896 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
897 }
898 // in 64 bit the use of SSE2 is the minimum
899 if (UseSSE < 2) UseSSE = 2;
900
901 // flush_icache_stub have to be generated first.
902 // That is why Icache line size is hard coded in ICache class,
903 // see icache_x86.hpp. It is also the reason why we can't use
904 // clflush instruction in 32-bit VM since it could be running
905 // on CPU which does not support it.
906 //
907 // The only thing we can do is to verify that flushed
908 // ICache::line_size has correct value.
909 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
910 // clflush_size is size in quadwords (8 bytes).
911 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
912
913 // assigning this field effectively enables Unsafe.writebackMemory()
914 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
915 // that is only implemented on x86_64 and only if the OS plays ball
916 if (os::supports_map_sync()) {
917 // publish data cache line flush size to generic field, otherwise
918 // let if default to zero thereby disabling writeback
919 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
920 }
921
922 // Check if processor has Intel Ecore
923 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
924 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
925 _model == 0xCC || _model == 0xDD)) {
926 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
927 }
928
929 if (UseSSE < 4) {
930 _features.clear_feature(CPU_SSE4_1);
931 _features.clear_feature(CPU_SSE4_2);
932 }
933
934 if (UseSSE < 3) {
935 _features.clear_feature(CPU_SSE3);
936 _features.clear_feature(CPU_SSSE3);
937 _features.clear_feature(CPU_SSE4A);
938 }
939
940 if (UseSSE < 2)
941 _features.clear_feature(CPU_SSE2);
942
943 if (UseSSE < 1)
944 _features.clear_feature(CPU_SSE);
945
946 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
947 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
948 UseAVX = 0;
949 }
950
951 // UseSSE is set to the smaller of what hardware supports and what
952 // the command line requires. I.e., you cannot set UseSSE to 2 on
953 // older Pentiums which do not support it.
954 int use_sse_limit = 0;
955 if (UseSSE > 0) {
956 if (UseSSE > 3 && supports_sse4_1()) {
957 use_sse_limit = 4;
958 } else if (UseSSE > 2 && supports_sse3()) {
959 use_sse_limit = 3;
960 } else if (UseSSE > 1 && supports_sse2()) {
961 use_sse_limit = 2;
962 } else if (UseSSE > 0 && supports_sse()) {
963 use_sse_limit = 1;
964 } else {
965 use_sse_limit = 0;
966 }
967 }
968 if (FLAG_IS_DEFAULT(UseSSE)) {
969 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
970 } else if (UseSSE > use_sse_limit) {
971 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
972 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
973 }
974
975 // first try initial setting and detect what we can support
976 int use_avx_limit = 0;
977 if (UseAVX > 0) {
978 if (UseSSE < 4) {
979 // Don't use AVX if SSE is unavailable or has been disabled.
980 use_avx_limit = 0;
981 } else if (UseAVX > 2 && supports_evex()) {
982 use_avx_limit = 3;
983 } else if (UseAVX > 1 && supports_avx2()) {
984 use_avx_limit = 2;
985 } else if (UseAVX > 0 && supports_avx()) {
986 use_avx_limit = 1;
987 } else {
988 use_avx_limit = 0;
989 }
990 }
991 if (FLAG_IS_DEFAULT(UseAVX)) {
992 // Don't use AVX-512 on older Skylakes unless explicitly requested.
993 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
994 FLAG_SET_DEFAULT(UseAVX, 2);
995 } else {
996 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
997 }
998 }
999
1000 if (UseAVX > use_avx_limit) {
1001 if (UseSSE < 4) {
1002 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1003 } else {
1004 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1005 }
1006 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1007 }
1008
1009 if (UseAVX < 3) {
1010 _features.clear_feature(CPU_AVX512F);
1011 _features.clear_feature(CPU_AVX512DQ);
1012 _features.clear_feature(CPU_AVX512CD);
1013 _features.clear_feature(CPU_AVX512BW);
1014 _features.clear_feature(CPU_AVX512ER);
1015 _features.clear_feature(CPU_AVX512PF);
1016 _features.clear_feature(CPU_AVX512VL);
1017 _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1018 _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1019 _features.clear_feature(CPU_AVX512_VAES);
1020 _features.clear_feature(CPU_AVX512_VNNI);
1021 _features.clear_feature(CPU_AVX512_VBMI);
1022 _features.clear_feature(CPU_AVX512_VBMI2);
1023 _features.clear_feature(CPU_AVX512_BITALG);
1024 _features.clear_feature(CPU_AVX512_IFMA);
1025 _features.clear_feature(CPU_APX_F);
1026 _features.clear_feature(CPU_AVX512_FP16);
1027 _features.clear_feature(CPU_AVX10_1);
1028 _features.clear_feature(CPU_AVX10_2);
1029 }
1030
1031
1032 if (UseAVX < 2) {
1033 _features.clear_feature(CPU_AVX2);
1034 _features.clear_feature(CPU_AVX_IFMA);
1035 }
1036
1037 if (UseAVX < 1) {
1038 _features.clear_feature(CPU_AVX);
1039 _features.clear_feature(CPU_VZEROUPPER);
1040 _features.clear_feature(CPU_F16C);
1041 _features.clear_feature(CPU_SHA512);
1042 }
1043
1044 if (logical_processors_per_package() == 1) {
1045 // HT processor could be installed on a system which doesn't support HT.
1046 _features.clear_feature(CPU_HT);
1047 }
1048
1049 if (is_intel()) { // Intel cpus specific settings
1050 if (is_knights_family()) {
1051 _features.clear_feature(CPU_VZEROUPPER);
1052 _features.clear_feature(CPU_AVX512BW);
1053 _features.clear_feature(CPU_AVX512VL);
1054 _features.clear_feature(CPU_APX_F);
1055 _features.clear_feature(CPU_AVX512DQ);
1056 _features.clear_feature(CPU_AVX512_VNNI);
1057 _features.clear_feature(CPU_AVX512_VAES);
1058 _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1059 _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1060 _features.clear_feature(CPU_AVX512_VBMI);
1061 _features.clear_feature(CPU_AVX512_VBMI2);
1062 _features.clear_feature(CPU_CLWB);
1063 _features.clear_feature(CPU_FLUSHOPT);
1064 _features.clear_feature(CPU_GFNI);
1065 _features.clear_feature(CPU_AVX512_BITALG);
1066 _features.clear_feature(CPU_AVX512_IFMA);
1067 _features.clear_feature(CPU_AVX_IFMA);
1068 _features.clear_feature(CPU_AVX512_FP16);
1069 _features.clear_feature(CPU_AVX10_1);
1070 _features.clear_feature(CPU_AVX10_2);
1071 }
1072 }
1073
1074 // Currently APX support is only enabled for targets supporting AVX512VL feature.
1075 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1076 if (UseAPX && !apx_supported) {
1077 warning("UseAPX is not supported on this CPU, setting it to false");
1078 FLAG_SET_DEFAULT(UseAPX, false);
1079 }
1080
1081 if (!UseAPX) {
1082 _features.clear_feature(CPU_APX_F);
1083 }
1084
1085 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1086 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1087 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1088 } else {
1089 _has_intel_jcc_erratum = IntelJccErratumMitigation;
1090 }
1091
1092 assert(supports_clflush(), "Always present");
1093 if (X86ICacheSync == -1) {
1094 // Auto-detect, choosing the best performant one that still flushes
1095 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1096 if (supports_clwb()) {
1097 FLAG_SET_ERGO(X86ICacheSync, 3);
1098 } else if (supports_clflushopt()) {
1099 FLAG_SET_ERGO(X86ICacheSync, 2);
1100 } else {
1101 FLAG_SET_ERGO(X86ICacheSync, 1);
1102 }
1103 } else {
1104 if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1105 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1106 }
1107 if ((X86ICacheSync == 3) && !supports_clwb()) {
1108 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1109 }
1110 if ((X86ICacheSync == 5) && !supports_serialize()) {
1111 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1112 }
1113 }
1114
1115 stringStream ss(2048);
1116 if (supports_hybrid()) {
1117 ss.print("(hybrid)");
1118 } else {
1119 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1120 }
1121 ss.print(" family %d model %d stepping %d microcode 0x%x",
1122 cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1123 ss.print(", ");
1124 int features_offset = (int)ss.size();
1125 insert_features_names(_features, ss);
1126
1127 _cpu_info_string = ss.as_string(true);
1128 _features_string = _cpu_info_string + features_offset;
1129
1130 // Use AES instructions if available.
1131 if (supports_aes()) {
1132 if (FLAG_IS_DEFAULT(UseAES)) {
1133 FLAG_SET_DEFAULT(UseAES, true);
1134 }
1135 if (!UseAES) {
1136 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1137 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1138 }
1139 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1140 } else {
1141 if (UseSSE > 2) {
1142 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1143 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1144 }
1145 } else {
1146 // The AES intrinsic stubs require AES instruction support (of course)
1147 // but also require sse3 mode or higher for instructions it use.
1148 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1149 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1150 }
1151 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1152 }
1153
1154 // --AES-CTR begins--
1155 if (!UseAESIntrinsics) {
1156 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1157 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1158 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1159 }
1160 } else {
1161 if (supports_sse4_1()) {
1162 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1163 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1164 }
1165 } else {
1166 // The AES-CTR intrinsic stubs require AES instruction support (of course)
1167 // but also require sse4.1 mode or higher for instructions it use.
1168 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1169 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1170 }
1171 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1172 }
1173 }
1174 // --AES-CTR ends--
1175 }
1176 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1177 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1178 warning("AES instructions are not available on this CPU");
1179 FLAG_SET_DEFAULT(UseAES, false);
1180 }
1181 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1182 warning("AES intrinsics are not available on this CPU");
1183 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1184 }
1185 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1186 warning("AES-CTR intrinsics are not available on this CPU");
1187 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1188 }
1189 }
1190
1191 // Use CLMUL instructions if available.
1192 if (supports_clmul()) {
1193 if (FLAG_IS_DEFAULT(UseCLMUL)) {
1194 UseCLMUL = true;
1195 }
1196 } else if (UseCLMUL) {
1197 if (!FLAG_IS_DEFAULT(UseCLMUL))
1198 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1199 FLAG_SET_DEFAULT(UseCLMUL, false);
1200 }
1201
1202 if (UseCLMUL && (UseSSE > 2)) {
1203 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1204 UseCRC32Intrinsics = true;
1205 }
1206 } else if (UseCRC32Intrinsics) {
1207 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1208 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1209 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1210 }
1211
1212 if (supports_avx2()) {
1213 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1214 UseAdler32Intrinsics = true;
1215 }
1216 } else if (UseAdler32Intrinsics) {
1217 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1218 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1219 }
1220 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1221 }
1222
1223 if (supports_sse4_2() && supports_clmul()) {
1224 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1225 UseCRC32CIntrinsics = true;
1226 }
1227 } else if (UseCRC32CIntrinsics) {
1228 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1229 warning("CRC32C intrinsics are not available on this CPU");
1230 }
1231 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1232 }
1233
1234 // GHASH/GCM intrinsics
1235 if (UseCLMUL && (UseSSE > 2)) {
1236 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1237 UseGHASHIntrinsics = true;
1238 }
1239 } else if (UseGHASHIntrinsics) {
1240 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1241 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1242 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1243 }
1244
1245 // ChaCha20 Intrinsics
1246 // As long as the system supports AVX as a baseline we can do a
1247 // SIMD-enabled block function. StubGenerator makes the determination
1248 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1249 // version.
1250 if (UseAVX >= 1) {
1251 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1252 UseChaCha20Intrinsics = true;
1253 }
1254 } else if (UseChaCha20Intrinsics) {
1255 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1256 warning("ChaCha20 intrinsic requires AVX instructions");
1257 }
1258 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1259 }
1260
1261 // Kyber Intrinsics
1262 // Currently we only have them for AVX512
1263 #ifdef _LP64
1264 if (supports_evex() && supports_avx512bw()) {
1265 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1266 UseKyberIntrinsics = true;
1267 }
1268 } else
1269 #endif
1270 if (UseKyberIntrinsics) {
1271 warning("Intrinsics for ML-KEM are not available on this CPU.");
1272 FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1273 }
1274
1275 // Dilithium Intrinsics
1276 // Currently we only have them for AVX512
1277 if (supports_evex() && supports_avx512bw()) {
1278 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1279 UseDilithiumIntrinsics = true;
1280 }
1281 } else if (UseDilithiumIntrinsics) {
1282 warning("Intrinsics for ML-DSA are not available on this CPU.");
1283 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1284 }
1285
1286 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1287 if (UseAVX >= 2) {
1288 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1289 UseBASE64Intrinsics = true;
1290 }
1291 } else if (UseBASE64Intrinsics) {
1292 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1293 warning("Base64 intrinsic requires EVEX instructions on this CPU");
1294 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1295 }
1296
1297 if (supports_fma()) {
1298 if (FLAG_IS_DEFAULT(UseFMA)) {
1299 UseFMA = true;
1300 }
1301 } else if (UseFMA) {
1302 warning("FMA instructions are not available on this CPU");
1303 FLAG_SET_DEFAULT(UseFMA, false);
1304 }
1305
1306 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1307 UseMD5Intrinsics = true;
1308 }
1309
1310 if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1311 if (FLAG_IS_DEFAULT(UseSHA)) {
1312 UseSHA = true;
1313 }
1314 } else if (UseSHA) {
1315 warning("SHA instructions are not available on this CPU");
1316 FLAG_SET_DEFAULT(UseSHA, false);
1317 }
1318
1319 if (supports_sha() && supports_sse4_1() && UseSHA) {
1320 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1321 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1322 }
1323 } else if (UseSHA1Intrinsics) {
1324 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1325 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1326 }
1327
1328 if (supports_sse4_1() && UseSHA) {
1329 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1330 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1331 }
1332 } else if (UseSHA256Intrinsics) {
1333 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1334 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1335 }
1336
1337 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1338 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1339 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1340 }
1341 } else if (UseSHA512Intrinsics) {
1342 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1343 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1344 }
1345
1346 if (supports_evex() && supports_avx512bw()) {
1347 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1348 UseSHA3Intrinsics = true;
1349 }
1350 } else if (UseSHA3Intrinsics) {
1351 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1352 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1353 }
1354
1355 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1356 FLAG_SET_DEFAULT(UseSHA, false);
1357 }
1358
1359 #if COMPILER2_OR_JVMCI
1360 int max_vector_size = 0;
1361 if (UseAVX == 0 || !os_supports_avx_vectors()) {
1362 // 16 byte vectors (in XMM) are supported with SSE2+
1363 max_vector_size = 16;
1364 } else if (UseAVX == 1 || UseAVX == 2) {
1365 // 32 bytes vectors (in YMM) are only supported with AVX+
1366 max_vector_size = 32;
1367 } else if (UseAVX > 2) {
1368 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1369 max_vector_size = 64;
1370 }
1371
1372 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1373
1374 if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1375 if (MaxVectorSize < min_vector_size) {
1376 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1377 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1378 }
1379 if (MaxVectorSize > max_vector_size) {
1380 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1381 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1382 }
1383 if (!is_power_of_2(MaxVectorSize)) {
1384 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1385 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1386 }
1387 } else {
1388 // If default, use highest supported configuration
1389 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1390 }
1391
1392 #if defined(COMPILER2) && defined(ASSERT)
1393 if (MaxVectorSize > 0) {
1394 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1395 tty->print_cr("State of YMM registers after signal handle:");
1396 int nreg = 4;
1397 const char* ymm_name[4] = {"0", "7", "8", "15"};
1398 for (int i = 0; i < nreg; i++) {
1399 tty->print("YMM%s:", ymm_name[i]);
1400 for (int j = 7; j >=0; j--) {
1401 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1402 }
1403 tty->cr();
1404 }
1405 }
1406 }
1407 #endif // COMPILER2 && ASSERT
1408
1409 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1410 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1411 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1412 }
1413 } else if (UsePoly1305Intrinsics) {
1414 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1415 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1416 }
1417
1418 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1419 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1420 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1421 }
1422 } else if (UseIntPolyIntrinsics) {
1423 warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1424 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1425 }
1426
1427 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1428 UseMultiplyToLenIntrinsic = true;
1429 }
1430 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1431 UseSquareToLenIntrinsic = true;
1432 }
1433 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1434 UseMulAddIntrinsic = true;
1435 }
1436 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1437 UseMontgomeryMultiplyIntrinsic = true;
1438 }
1439 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1440 UseMontgomerySquareIntrinsic = true;
1441 }
1442 #endif // COMPILER2_OR_JVMCI
1443
1444 // On new cpus instructions which update whole XMM register should be used
1445 // to prevent partial register stall due to dependencies on high half.
1446 //
1447 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1448 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1449 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1450 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1451
1452
1453 if (is_zx()) { // ZX cpus specific settings
1454 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1455 UseStoreImmI16 = false; // don't use it on ZX cpus
1456 }
1457 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1458 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1459 // Use it on all ZX cpus
1460 UseAddressNop = true;
1461 }
1462 }
1463 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1464 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1465 }
1466 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1467 if (supports_sse3()) {
1468 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1469 } else {
1470 UseXmmRegToRegMoveAll = false;
1471 }
1472 }
1473 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1474 #ifdef COMPILER2
1475 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1476 // For new ZX cpus do the next optimization:
1477 // don't align the beginning of a loop if there are enough instructions
1478 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1479 // in current fetch line (OptoLoopAlignment) or the padding
1480 // is big (> MaxLoopPad).
1481 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1482 // generated NOP instructions. 11 is the largest size of one
1483 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1484 MaxLoopPad = 11;
1485 }
1486 #endif // COMPILER2
1487 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1488 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1489 }
1490 if (supports_sse4_2()) { // new ZX cpus
1491 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1492 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1493 }
1494 }
1495 }
1496
1497 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1498 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1499 }
1500 }
1501
1502 if (is_amd_family()) { // AMD cpus specific settings
1503 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1504 // Use it on new AMD cpus starting from Opteron.
1505 UseAddressNop = true;
1506 }
1507 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1508 // Use it on new AMD cpus starting from Opteron.
1509 UseNewLongLShift = true;
1510 }
1511 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1512 if (supports_sse4a()) {
1513 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1514 } else {
1515 UseXmmLoadAndClearUpper = false;
1516 }
1517 }
1518 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1519 if (supports_sse4a()) {
1520 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1521 } else {
1522 UseXmmRegToRegMoveAll = false;
1523 }
1524 }
1525 if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1526 if (supports_sse4a()) {
1527 UseXmmI2F = true;
1528 } else {
1529 UseXmmI2F = false;
1530 }
1531 }
1532 if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1533 if (supports_sse4a()) {
1534 UseXmmI2D = true;
1535 } else {
1536 UseXmmI2D = false;
1537 }
1538 }
1539
1540 // some defaults for AMD family 15h
1541 if (cpu_family() == 0x15) {
1542 // On family 15h processors default is no sw prefetch
1543 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1544 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1545 }
1546 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1547 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1548 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1549 }
1550 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1551 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1552 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1553 }
1554 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1555 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1556 }
1557 }
1558
1559 #ifdef COMPILER2
1560 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1561 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1562 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1563 }
1564 #endif // COMPILER2
1565
1566 // Some defaults for AMD family >= 17h && Hygon family 18h
1567 if (cpu_family() >= 0x17) {
1568 // On family >=17h processors use XMM and UnalignedLoadStores
1569 // for Array Copy
1570 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1571 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1572 }
1573 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1574 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1575 }
1576 #ifdef COMPILER2
1577 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1578 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1579 }
1580 #endif
1581 }
1582 }
1583
1584 if (is_intel()) { // Intel cpus specific settings
1585 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1586 UseStoreImmI16 = false; // don't use it on Intel cpus
1587 }
1588 if (is_intel_server_family() || cpu_family() == 15) {
1589 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1590 // Use it on all Intel cpus starting from PentiumPro
1591 UseAddressNop = true;
1592 }
1593 }
1594 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1595 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1596 }
1597 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1598 if (supports_sse3()) {
1599 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1600 } else {
1601 UseXmmRegToRegMoveAll = false;
1602 }
1603 }
1604 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1605 #ifdef COMPILER2
1606 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1607 // For new Intel cpus do the next optimization:
1608 // don't align the beginning of a loop if there are enough instructions
1609 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1610 // in current fetch line (OptoLoopAlignment) or the padding
1611 // is big (> MaxLoopPad).
1612 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1613 // generated NOP instructions. 11 is the largest size of one
1614 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1615 MaxLoopPad = 11;
1616 }
1617 #endif // COMPILER2
1618
1619 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1620 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1621 }
1622 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1623 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1624 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1625 }
1626 }
1627 }
1628 if (is_atom_family() || is_knights_family()) {
1629 #ifdef COMPILER2
1630 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1631 OptoScheduling = true;
1632 }
1633 #endif
1634 if (supports_sse4_2()) { // Silvermont
1635 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1636 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1637 }
1638 }
1639 if (FLAG_IS_DEFAULT(UseIncDec)) {
1640 FLAG_SET_DEFAULT(UseIncDec, false);
1641 }
1642 }
1643 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1644 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1645 }
1646 #ifdef COMPILER2
1647 if (UseAVX > 2) {
1648 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1649 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1650 ArrayOperationPartialInlineSize != 0 &&
1651 ArrayOperationPartialInlineSize != 16 &&
1652 ArrayOperationPartialInlineSize != 32 &&
1653 ArrayOperationPartialInlineSize != 64)) {
1654 int inline_size = 0;
1655 if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1656 inline_size = 64;
1657 } else if (MaxVectorSize >= 32) {
1658 inline_size = 32;
1659 } else if (MaxVectorSize >= 16) {
1660 inline_size = 16;
1661 }
1662 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1663 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1664 }
1665 ArrayOperationPartialInlineSize = inline_size;
1666 }
1667
1668 if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1669 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1670 if (ArrayOperationPartialInlineSize) {
1671 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1672 } else {
1673 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1674 }
1675 }
1676 }
1677 #endif
1678 }
1679
1680 #ifdef COMPILER2
1681 if (FLAG_IS_DEFAULT(OptimizeFill)) {
1682 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1683 OptimizeFill = false;
1684 }
1685 }
1686 #endif
1687 if (supports_sse4_2()) {
1688 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1689 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1690 }
1691 } else {
1692 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1693 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1694 }
1695 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1696 }
1697 if (UseSSE42Intrinsics) {
1698 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1699 UseVectorizedMismatchIntrinsic = true;
1700 }
1701 } else if (UseVectorizedMismatchIntrinsic) {
1702 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1703 warning("vectorizedMismatch intrinsics are not available on this CPU");
1704 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1705 }
1706 if (UseAVX >= 2) {
1707 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1708 } else if (UseVectorizedHashCodeIntrinsic) {
1709 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1710 warning("vectorizedHashCode intrinsics are not available on this CPU");
1711 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1712 }
1713
1714 // Use count leading zeros count instruction if available.
1715 if (supports_lzcnt()) {
1716 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1717 UseCountLeadingZerosInstruction = true;
1718 }
1719 } else if (UseCountLeadingZerosInstruction) {
1720 warning("lzcnt instruction is not available on this CPU");
1721 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1722 }
1723
1724 // Use count trailing zeros instruction if available
1725 if (supports_bmi1()) {
1726 // tzcnt does not require VEX prefix
1727 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1728 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1729 // Don't use tzcnt if BMI1 is switched off on command line.
1730 UseCountTrailingZerosInstruction = false;
1731 } else {
1732 UseCountTrailingZerosInstruction = true;
1733 }
1734 }
1735 } else if (UseCountTrailingZerosInstruction) {
1736 warning("tzcnt instruction is not available on this CPU");
1737 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1738 }
1739
1740 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1741 // VEX prefix is generated only when AVX > 0.
1742 if (supports_bmi1() && supports_avx()) {
1743 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1744 UseBMI1Instructions = true;
1745 }
1746 } else if (UseBMI1Instructions) {
1747 warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1748 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1749 }
1750
1751 if (supports_bmi2() && supports_avx()) {
1752 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1753 UseBMI2Instructions = true;
1754 }
1755 } else if (UseBMI2Instructions) {
1756 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1757 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1758 }
1759
1760 // Use population count instruction if available.
1761 if (supports_popcnt()) {
1762 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1763 UsePopCountInstruction = true;
1764 }
1765 } else if (UsePopCountInstruction) {
1766 warning("POPCNT instruction is not available on this CPU");
1767 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1768 }
1769
1770 // Use fast-string operations if available.
1771 if (supports_erms()) {
1772 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1773 UseFastStosb = true;
1774 }
1775 } else if (UseFastStosb) {
1776 warning("fast-string operations are not available on this CPU");
1777 FLAG_SET_DEFAULT(UseFastStosb, false);
1778 }
1779
1780 // For AMD Processors use XMM/YMM MOVDQU instructions
1781 // for Object Initialization as default
1782 if (is_amd() && cpu_family() >= 0x19) {
1783 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1784 UseFastStosb = false;
1785 }
1786 }
1787
1788 #ifdef COMPILER2
1789 if (is_intel() && MaxVectorSize > 16) {
1790 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1791 UseFastStosb = false;
1792 }
1793 }
1794 #endif
1795
1796 // Use XMM/YMM MOVDQU instruction for Object Initialization
1797 if (!UseFastStosb && UseUnalignedLoadStores) {
1798 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1799 UseXMMForObjInit = true;
1800 }
1801 } else if (UseXMMForObjInit) {
1802 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1803 FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1804 }
1805
1806 #ifdef COMPILER2
1807 if (FLAG_IS_DEFAULT(AlignVector)) {
1808 // Modern processors allow misaligned memory operations for vectors.
1809 AlignVector = !UseUnalignedLoadStores;
1810 }
1811 #endif // COMPILER2
1812
1813 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1814 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1815 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1816 } else if (!supports_sse() && supports_3dnow_prefetch()) {
1817 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1818 }
1819 }
1820
1821 // Allocation prefetch settings
1822 int cache_line_size = checked_cast<int>(prefetch_data_size());
1823 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1824 (cache_line_size > AllocatePrefetchStepSize)) {
1825 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1826 }
1827
1828 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1829 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1830 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1831 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1832 }
1833 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1834 }
1835
1836 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1837 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1838 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1839 }
1840
1841 if (is_intel() && is_intel_server_family() && supports_sse3()) {
1842 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1843 supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1844 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1845 }
1846 #ifdef COMPILER2
1847 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1848 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1849 }
1850 #endif
1851 }
1852
1853 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1854 #ifdef COMPILER2
1855 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1856 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1857 }
1858 #endif
1859 }
1860
1861 // Prefetch settings
1862
1863 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1864 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1865 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1866 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1867
1868 // gc copy/scan is disabled if prefetchw isn't supported, because
1869 // Prefetch::write emits an inlined prefetchw on Linux.
1870 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1871 // The used prefetcht0 instruction works for both amd64 and em64t.
1872
1873 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1874 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1875 }
1876 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1877 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1878 }
1879
1880 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1881 (cache_line_size > ContendedPaddingWidth))
1882 ContendedPaddingWidth = cache_line_size;
1883
1884 // This machine allows unaligned memory accesses
1885 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1886 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1887 }
1888
1889 #ifndef PRODUCT
1890 if (log_is_enabled(Info, os, cpu)) {
1891 LogStream ls(Log(os, cpu)::info());
1892 outputStream* log = &ls;
1893 log->print_cr("Logical CPUs per core: %u",
1894 logical_processors_per_package());
1895 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1896 log->print("UseSSE=%d", UseSSE);
1897 if (UseAVX > 0) {
1898 log->print(" UseAVX=%d", UseAVX);
1899 }
1900 if (UseAES) {
1901 log->print(" UseAES=1");
1902 }
1903 #ifdef COMPILER2
1904 if (MaxVectorSize > 0) {
1905 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1906 }
1907 #endif
1908 log->cr();
1909 log->print("Allocation");
1910 if (AllocatePrefetchStyle <= 0) {
1911 log->print_cr(": no prefetching");
1912 } else {
1913 log->print(" prefetching: ");
1914 if (AllocatePrefetchInstr == 0) {
1915 log->print("PREFETCHNTA");
1916 } else if (AllocatePrefetchInstr == 1) {
1917 log->print("PREFETCHT0");
1918 } else if (AllocatePrefetchInstr == 2) {
1919 log->print("PREFETCHT2");
1920 } else if (AllocatePrefetchInstr == 3) {
1921 log->print("PREFETCHW");
1922 }
1923 if (AllocatePrefetchLines > 1) {
1924 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1925 } else {
1926 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1927 }
1928 }
1929
1930 if (PrefetchCopyIntervalInBytes > 0) {
1931 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1932 }
1933 if (PrefetchScanIntervalInBytes > 0) {
1934 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1935 }
1936 if (ContendedPaddingWidth > 0) {
1937 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1938 }
1939 }
1940 #endif // !PRODUCT
1941 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1942 FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1943 }
1944 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1945 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1946 }
1947 }
1948
1949 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1950 VirtualizationType vrt = VM_Version::get_detected_virtualization();
1951 if (vrt == XenHVM) {
1952 st->print_cr("Xen hardware-assisted virtualization detected");
1953 } else if (vrt == KVM) {
1954 st->print_cr("KVM virtualization detected");
1955 } else if (vrt == VMWare) {
1956 st->print_cr("VMWare virtualization detected");
1957 VirtualizationSupport::print_virtualization_info(st);
1958 } else if (vrt == HyperV) {
1959 st->print_cr("Hyper-V virtualization detected");
1960 } else if (vrt == HyperVRole) {
1961 st->print_cr("Hyper-V role detected");
1962 }
1963 }
1964
1965 bool VM_Version::compute_has_intel_jcc_erratum() {
1966 if (!is_intel_family_core()) {
1967 // Only Intel CPUs are affected.
1968 return false;
1969 }
1970 // The following table of affected CPUs is based on the following document released by Intel:
1971 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1972 switch (_model) {
1973 case 0x8E:
1974 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1975 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1976 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1977 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1978 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1979 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1980 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1981 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1982 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1983 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1984 case 0x4E:
1985 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1986 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1987 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1988 return _stepping == 0x3;
1989 case 0x55:
1990 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1991 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1992 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1993 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1994 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1995 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1996 return _stepping == 0x4 || _stepping == 0x7;
1997 case 0x5E:
1998 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1999 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
2000 return _stepping == 0x3;
2001 case 0x9E:
2002 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2003 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2004 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2005 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2006 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2007 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2008 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2009 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2010 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2011 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2012 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2013 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2014 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2015 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2016 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2017 case 0xA5:
2018 // Not in Intel documentation.
2019 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2020 return true;
2021 case 0xA6:
2022 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2023 return _stepping == 0x0;
2024 case 0xAE:
2025 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2026 return _stepping == 0xA;
2027 default:
2028 // If we are running on another intel machine not recognized in the table, we are okay.
2029 return false;
2030 }
2031 }
2032
2033 // On Xen, the cpuid instruction returns
2034 // eax / registers[0]: Version of Xen
2035 // ebx / registers[1]: chars 'XenV'
2036 // ecx / registers[2]: chars 'MMXe'
2037 // edx / registers[3]: chars 'nVMM'
2038 //
2039 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2040 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2041 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2042 // edx / registers[3]: chars 'M' / 'ware' / 't Hv'
2043 //
2044 // more information :
2045 // https://kb.vmware.com/s/article/1009458
2046 //
2047 void VM_Version::check_virtualizations() {
2048 uint32_t registers[4] = {0};
2049 char signature[13] = {0};
2050
2051 // Xen cpuid leaves can be found 0x100 aligned boundary starting
2052 // from 0x40000000 until 0x40010000.
2053 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2054 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2055 detect_virt_stub(leaf, registers);
2056 memcpy(signature, ®isters[1], 12);
2057
2058 if (strncmp("VMwareVMware", signature, 12) == 0) {
2059 Abstract_VM_Version::_detected_virtualization = VMWare;
2060 // check for extended metrics from guestlib
2061 VirtualizationSupport::initialize();
2062 } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2063 Abstract_VM_Version::_detected_virtualization = HyperV;
2064 #ifdef _WINDOWS
2065 // CPUID leaf 0x40000007 is available to the root partition only.
2066 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2067 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2068 detect_virt_stub(0x40000007, registers);
2069 if ((registers[0] != 0x0) ||
2070 (registers[1] != 0x0) ||
2071 (registers[2] != 0x0) ||
2072 (registers[3] != 0x0)) {
2073 Abstract_VM_Version::_detected_virtualization = HyperVRole;
2074 }
2075 #endif
2076 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2077 Abstract_VM_Version::_detected_virtualization = KVM;
2078 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2079 Abstract_VM_Version::_detected_virtualization = XenHVM;
2080 }
2081 }
2082 }
2083
2084 #ifdef COMPILER2
2085 // Determine if it's running on Cascade Lake using default options.
2086 bool VM_Version::is_default_intel_cascade_lake() {
2087 return FLAG_IS_DEFAULT(UseAVX) &&
2088 FLAG_IS_DEFAULT(MaxVectorSize) &&
2089 UseAVX > 2 &&
2090 is_intel_cascade_lake();
2091 }
2092 #endif
2093
2094 bool VM_Version::is_intel_cascade_lake() {
2095 return is_intel_skylake() && _stepping >= 5;
2096 }
2097
2098 bool VM_Version::is_intel_darkmont() {
2099 return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2100 }
2101
2102 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2103 // for implementing the array copy and clear operations.
2104 // The Intel platforms that supports the serialize instruction
2105 // has improved implementation of 64-byte load/stores and so the default
2106 // threshold is set to 0 for these platforms.
2107 int VM_Version::avx3_threshold() {
2108 return (is_intel_server_family() &&
2109 supports_serialize() &&
2110 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2111 }
2112
2113 void VM_Version::clear_apx_test_state() {
2114 clear_apx_test_state_stub();
2115 }
2116
2117 static bool _vm_version_initialized = false;
2118
2119 void VM_Version::initialize() {
2120 ResourceMark rm;
2121
2122 // Making this stub must be FIRST use of assembler
2123 stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2124 if (stub_blob == nullptr) {
2125 vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2126 }
2127 CodeBuffer c(stub_blob);
2128 VM_Version_StubGenerator g(&c);
2129
2130 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2131 g.generate_get_cpu_info());
2132 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2133 g.generate_detect_virt());
2134 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2135 g.clear_apx_test_state());
2136 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2137 g.generate_getCPUIDBrandString());
2138 get_processor_features();
2139
2140 Assembler::precompute_instructions();
2141
2142 if (VM_Version::supports_hv()) { // Supports hypervisor
2143 check_virtualizations();
2144 }
2145 _vm_version_initialized = true;
2146 }
2147
2148 typedef enum {
2149 CPU_FAMILY_8086_8088 = 0,
2150 CPU_FAMILY_INTEL_286 = 2,
2151 CPU_FAMILY_INTEL_386 = 3,
2152 CPU_FAMILY_INTEL_486 = 4,
2153 CPU_FAMILY_PENTIUM = 5,
2154 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models
2155 CPU_FAMILY_PENTIUM_4 = 0xF
2156 } FamilyFlag;
2157
2158 typedef enum {
2159 RDTSCP_FLAG = 0x08000000, // bit 27
2160 INTEL64_FLAG = 0x20000000 // bit 29
2161 } _featureExtendedEdxFlag;
2162
2163 typedef enum {
2164 FPU_FLAG = 0x00000001,
2165 VME_FLAG = 0x00000002,
2166 DE_FLAG = 0x00000004,
2167 PSE_FLAG = 0x00000008,
2168 TSC_FLAG = 0x00000010,
2169 MSR_FLAG = 0x00000020,
2170 PAE_FLAG = 0x00000040,
2171 MCE_FLAG = 0x00000080,
2172 CX8_FLAG = 0x00000100,
2173 APIC_FLAG = 0x00000200,
2174 SEP_FLAG = 0x00000800,
2175 MTRR_FLAG = 0x00001000,
2176 PGE_FLAG = 0x00002000,
2177 MCA_FLAG = 0x00004000,
2178 CMOV_FLAG = 0x00008000,
2179 PAT_FLAG = 0x00010000,
2180 PSE36_FLAG = 0x00020000,
2181 PSNUM_FLAG = 0x00040000,
2182 CLFLUSH_FLAG = 0x00080000,
2183 DTS_FLAG = 0x00200000,
2184 ACPI_FLAG = 0x00400000,
2185 MMX_FLAG = 0x00800000,
2186 FXSR_FLAG = 0x01000000,
2187 SSE_FLAG = 0x02000000,
2188 SSE2_FLAG = 0x04000000,
2189 SS_FLAG = 0x08000000,
2190 HTT_FLAG = 0x10000000,
2191 TM_FLAG = 0x20000000
2192 } FeatureEdxFlag;
2193
2194 // VM_Version statics
2195 enum {
2196 ExtendedFamilyIdLength_INTEL = 16,
2197 ExtendedFamilyIdLength_AMD = 24
2198 };
2199
2200 const size_t VENDOR_LENGTH = 13;
2201 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2202 static char* _cpu_brand_string = nullptr;
2203 static int64_t _max_qualified_cpu_frequency = 0;
2204
2205 static int _no_of_threads = 0;
2206 static int _no_of_cores = 0;
2207
2208 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2209 "8086/8088",
2210 "",
2211 "286",
2212 "386",
2213 "486",
2214 "Pentium",
2215 "Pentium Pro", //or Pentium-M/Woodcrest depending on model
2216 "",
2217 "",
2218 "",
2219 "",
2220 "",
2221 "",
2222 "",
2223 "",
2224 "Pentium 4"
2225 };
2226
2227 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2228 "",
2229 "",
2230 "",
2231 "",
2232 "5x86",
2233 "K5/K6",
2234 "Athlon/AthlonXP",
2235 "",
2236 "",
2237 "",
2238 "",
2239 "",
2240 "",
2241 "",
2242 "",
2243 "Opteron/Athlon64",
2244 "Opteron QC/Phenom", // Barcelona et.al.
2245 "",
2246 "",
2247 "",
2248 "",
2249 "",
2250 "",
2251 "Zen"
2252 };
2253 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2254 // September 2013, Vol 3C Table 35-1
2255 const char* const _model_id_pentium_pro[] = {
2256 "",
2257 "Pentium Pro",
2258 "",
2259 "Pentium II model 3",
2260 "",
2261 "Pentium II model 5/Xeon/Celeron",
2262 "Celeron",
2263 "Pentium III/Pentium III Xeon",
2264 "Pentium III/Pentium III Xeon",
2265 "Pentium M model 9", // Yonah
2266 "Pentium III, model A",
2267 "Pentium III, model B",
2268 "",
2269 "Pentium M model D", // Dothan
2270 "",
2271 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2272 "",
2273 "",
2274 "",
2275 "",
2276 "",
2277 "",
2278 "Celeron", // 0x16 Celeron 65nm
2279 "Core 2", // 0x17 Penryn / Harpertown
2280 "",
2281 "",
2282 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP
2283 "Atom", // 0x1B Z5xx series Silverthorn
2284 "",
2285 "Core 2", // 0x1D Dunnington (6-core)
2286 "Nehalem", // 0x1E CPU_MODEL_NEHALEM
2287 "",
2288 "",
2289 "",
2290 "",
2291 "",
2292 "",
2293 "Westmere", // 0x25 CPU_MODEL_WESTMERE
2294 "",
2295 "",
2296 "", // 0x28
2297 "",
2298 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3"
2299 "",
2300 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP
2301 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2302 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX
2303 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX
2304 "",
2305 "",
2306 "",
2307 "",
2308 "",
2309 "",
2310 "",
2311 "",
2312 "",
2313 "",
2314 "Ivy Bridge", // 0x3a
2315 "",
2316 "Haswell", // 0x3c "4th Generation Intel Core Processor"
2317 "", // 0x3d "Next Generation Intel Core Processor"
2318 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2319 "", // 0x3f "Future Generation Intel Xeon Processor"
2320 "",
2321 "",
2322 "",
2323 "",
2324 "",
2325 "Haswell", // 0x45 "4th Generation Intel Core Processor"
2326 "Haswell", // 0x46 "4th Generation Intel Core Processor"
2327 nullptr
2328 };
2329
2330 /* Brand ID is for back compatibility
2331 * Newer CPUs uses the extended brand string */
2332 const char* const _brand_id[] = {
2333 "",
2334 "Celeron processor",
2335 "Pentium III processor",
2336 "Intel Pentium III Xeon processor",
2337 "",
2338 "",
2339 "",
2340 "",
2341 "Intel Pentium 4 processor",
2342 nullptr
2343 };
2344
2345
2346 const char* const _feature_edx_id[] = {
2347 "On-Chip FPU",
2348 "Virtual Mode Extensions",
2349 "Debugging Extensions",
2350 "Page Size Extensions",
2351 "Time Stamp Counter",
2352 "Model Specific Registers",
2353 "Physical Address Extension",
2354 "Machine Check Exceptions",
2355 "CMPXCHG8B Instruction",
2356 "On-Chip APIC",
2357 "",
2358 "Fast System Call",
2359 "Memory Type Range Registers",
2360 "Page Global Enable",
2361 "Machine Check Architecture",
2362 "Conditional Mov Instruction",
2363 "Page Attribute Table",
2364 "36-bit Page Size Extension",
2365 "Processor Serial Number",
2366 "CLFLUSH Instruction",
2367 "",
2368 "Debug Trace Store feature",
2369 "ACPI registers in MSR space",
2370 "Intel Architecture MMX Technology",
2371 "Fast Float Point Save and Restore",
2372 "Streaming SIMD extensions",
2373 "Streaming SIMD extensions 2",
2374 "Self-Snoop",
2375 "Hyper Threading",
2376 "Thermal Monitor",
2377 "",
2378 "Pending Break Enable"
2379 };
2380
2381 const char* const _feature_extended_edx_id[] = {
2382 "",
2383 "",
2384 "",
2385 "",
2386 "",
2387 "",
2388 "",
2389 "",
2390 "",
2391 "",
2392 "",
2393 "SYSCALL/SYSRET",
2394 "",
2395 "",
2396 "",
2397 "",
2398 "",
2399 "",
2400 "",
2401 "",
2402 "Execute Disable Bit",
2403 "",
2404 "",
2405 "",
2406 "",
2407 "",
2408 "",
2409 "RDTSCP",
2410 "",
2411 "Intel 64 Architecture",
2412 "",
2413 ""
2414 };
2415
2416 const char* const _feature_ecx_id[] = {
2417 "Streaming SIMD Extensions 3",
2418 "PCLMULQDQ",
2419 "64-bit DS Area",
2420 "MONITOR/MWAIT instructions",
2421 "CPL Qualified Debug Store",
2422 "Virtual Machine Extensions",
2423 "Safer Mode Extensions",
2424 "Enhanced Intel SpeedStep technology",
2425 "Thermal Monitor 2",
2426 "Supplemental Streaming SIMD Extensions 3",
2427 "L1 Context ID",
2428 "",
2429 "Fused Multiply-Add",
2430 "CMPXCHG16B",
2431 "xTPR Update Control",
2432 "Perfmon and Debug Capability",
2433 "",
2434 "Process-context identifiers",
2435 "Direct Cache Access",
2436 "Streaming SIMD extensions 4.1",
2437 "Streaming SIMD extensions 4.2",
2438 "x2APIC",
2439 "MOVBE",
2440 "Popcount instruction",
2441 "TSC-Deadline",
2442 "AESNI",
2443 "XSAVE",
2444 "OSXSAVE",
2445 "AVX",
2446 "F16C",
2447 "RDRAND",
2448 ""
2449 };
2450
2451 const char* const _feature_extended_ecx_id[] = {
2452 "LAHF/SAHF instruction support",
2453 "Core multi-processor legacy mode",
2454 "",
2455 "",
2456 "",
2457 "Advanced Bit Manipulations: LZCNT",
2458 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2459 "Misaligned SSE mode",
2460 "",
2461 "",
2462 "",
2463 "",
2464 "",
2465 "",
2466 "",
2467 "",
2468 "",
2469 "",
2470 "",
2471 "",
2472 "",
2473 "",
2474 "",
2475 "",
2476 "",
2477 "",
2478 "",
2479 "",
2480 "",
2481 "",
2482 "",
2483 ""
2484 };
2485
2486 const char* VM_Version::cpu_model_description(void) {
2487 uint32_t cpu_family = extended_cpu_family();
2488 uint32_t cpu_model = extended_cpu_model();
2489 const char* model = nullptr;
2490
2491 if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2492 for (uint32_t i = 0; i <= cpu_model; i++) {
2493 model = _model_id_pentium_pro[i];
2494 if (model == nullptr) {
2495 break;
2496 }
2497 }
2498 }
2499 return model;
2500 }
2501
2502 const char* VM_Version::cpu_brand_string(void) {
2503 if (_cpu_brand_string == nullptr) {
2504 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2505 if (nullptr == _cpu_brand_string) {
2506 return nullptr;
2507 }
2508 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2509 if (ret_val != OS_OK) {
2510 FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2511 _cpu_brand_string = nullptr;
2512 }
2513 }
2514 return _cpu_brand_string;
2515 }
2516
2517 const char* VM_Version::cpu_brand(void) {
2518 const char* brand = nullptr;
2519
2520 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2521 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2522 brand = _brand_id[0];
2523 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2524 brand = _brand_id[i];
2525 }
2526 }
2527 return brand;
2528 }
2529
2530 bool VM_Version::cpu_is_em64t(void) {
2531 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2532 }
2533
2534 bool VM_Version::is_netburst(void) {
2535 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2536 }
2537
2538 bool VM_Version::supports_tscinv_ext(void) {
2539 if (!supports_tscinv_bit()) {
2540 return false;
2541 }
2542
2543 if (is_intel()) {
2544 return true;
2545 }
2546
2547 if (is_amd()) {
2548 return !is_amd_Barcelona();
2549 }
2550
2551 if (is_hygon()) {
2552 return true;
2553 }
2554
2555 return false;
2556 }
2557
2558 void VM_Version::resolve_cpu_information_details(void) {
2559
2560 // in future we want to base this information on proper cpu
2561 // and cache topology enumeration such as:
2562 // Intel 64 Architecture Processor Topology Enumeration
2563 // which supports system cpu and cache topology enumeration
2564 // either using 2xAPICIDs or initial APICIDs
2565
2566 // currently only rough cpu information estimates
2567 // which will not necessarily reflect the exact configuration of the system
2568
2569 // this is the number of logical hardware threads
2570 // visible to the operating system
2571 _no_of_threads = os::processor_count();
2572
2573 // find out number of threads per cpu package
2574 int threads_per_package = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus;
2575 if (threads_per_package == 0) {
2576 // Fallback code to avoid div by zero in subsequent code.
2577 // CPUID 0Bh (ECX = 1) might return 0 on older AMD processor (EPYC 7763 at least)
2578 threads_per_package = threads_per_core() * cores_per_cpu();
2579 }
2580
2581 // use amount of threads visible to the process in order to guess number of sockets
2582 _no_of_sockets = _no_of_threads / threads_per_package;
2583
2584 // process might only see a subset of the total number of threads
2585 // from a single processor package. Virtualization/resource management for example.
2586 // If so then just write a hard 1 as num of pkgs.
2587 if (0 == _no_of_sockets) {
2588 _no_of_sockets = 1;
2589 }
2590
2591 // estimate the number of cores
2592 _no_of_cores = cores_per_cpu() * _no_of_sockets;
2593 }
2594
2595
2596 const char* VM_Version::cpu_family_description(void) {
2597 int cpu_family_id = extended_cpu_family();
2598 if (is_amd()) {
2599 if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2600 return _family_id_amd[cpu_family_id];
2601 }
2602 }
2603 if (is_intel()) {
2604 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2605 return cpu_model_description();
2606 }
2607 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2608 return _family_id_intel[cpu_family_id];
2609 }
2610 }
2611 if (is_hygon()) {
2612 return "Dhyana";
2613 }
2614 return "Unknown x86";
2615 }
2616
2617 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2618 assert(buf != nullptr, "buffer is null!");
2619 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2620
2621 const char* cpu_type = nullptr;
2622 const char* x64 = nullptr;
2623
2624 if (is_intel()) {
2625 cpu_type = "Intel";
2626 x64 = cpu_is_em64t() ? " Intel64" : "";
2627 } else if (is_amd()) {
2628 cpu_type = "AMD";
2629 x64 = cpu_is_em64t() ? " AMD64" : "";
2630 } else if (is_hygon()) {
2631 cpu_type = "Hygon";
2632 x64 = cpu_is_em64t() ? " AMD64" : "";
2633 } else {
2634 cpu_type = "Unknown x86";
2635 x64 = cpu_is_em64t() ? " x86_64" : "";
2636 }
2637
2638 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2639 cpu_type,
2640 cpu_family_description(),
2641 supports_ht() ? " (HT)" : "",
2642 supports_sse3() ? " SSE3" : "",
2643 supports_ssse3() ? " SSSE3" : "",
2644 supports_sse4_1() ? " SSE4.1" : "",
2645 supports_sse4_2() ? " SSE4.2" : "",
2646 supports_sse4a() ? " SSE4A" : "",
2647 is_netburst() ? " Netburst" : "",
2648 is_intel_family_core() ? " Core" : "",
2649 x64);
2650
2651 return OS_OK;
2652 }
2653
2654 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2655 assert(buf != nullptr, "buffer is null!");
2656 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2657 assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2658
2659 // invoke newly generated asm code to fetch CPU Brand String
2660 getCPUIDBrandString_stub(&_cpuid_info);
2661
2662 // fetch results into buffer
2663 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0;
2664 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1;
2665 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2;
2666 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2667 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2668 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2669 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2670 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2671 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2672 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2673 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2674 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2675
2676 return OS_OK;
2677 }
2678
2679 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2680 guarantee(buf != nullptr, "buffer is null!");
2681 guarantee(buf_len > 0, "buffer len not enough!");
2682
2683 unsigned int flag = 0;
2684 unsigned int fi = 0;
2685 size_t written = 0;
2686 const char* prefix = "";
2687
2688 #define WRITE_TO_BUF(string) \
2689 { \
2690 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2691 if (res < 0) { \
2692 return buf_len - 1; \
2693 } \
2694 written += res; \
2695 if (prefix[0] == '\0') { \
2696 prefix = ", "; \
2697 } \
2698 }
2699
2700 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2701 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2702 continue; /* no hyperthreading */
2703 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2704 continue; /* no fast system call */
2705 }
2706 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2707 WRITE_TO_BUF(_feature_edx_id[fi]);
2708 }
2709 }
2710
2711 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2712 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2713 WRITE_TO_BUF(_feature_ecx_id[fi]);
2714 }
2715 }
2716
2717 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2718 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2719 WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2720 }
2721 }
2722
2723 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2724 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2725 WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2726 }
2727 }
2728
2729 if (supports_tscinv_bit()) {
2730 WRITE_TO_BUF("Invariant TSC");
2731 }
2732
2733 if (supports_hybrid()) {
2734 WRITE_TO_BUF("Hybrid Architecture");
2735 }
2736
2737 return written;
2738 }
2739
2740 /**
2741 * Write a detailed description of the cpu to a given buffer, including
2742 * feature set.
2743 */
2744 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2745 assert(buf != nullptr, "buffer is null!");
2746 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2747
2748 static const char* unknown = "<unknown>";
2749 char vendor_id[VENDOR_LENGTH];
2750 const char* family = nullptr;
2751 const char* model = nullptr;
2752 const char* brand = nullptr;
2753 int outputLen = 0;
2754
2755 family = cpu_family_description();
2756 if (family == nullptr) {
2757 family = unknown;
2758 }
2759
2760 model = cpu_model_description();
2761 if (model == nullptr) {
2762 model = unknown;
2763 }
2764
2765 brand = cpu_brand_string();
2766
2767 if (brand == nullptr) {
2768 brand = cpu_brand();
2769 if (brand == nullptr) {
2770 brand = unknown;
2771 }
2772 }
2773
2774 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2775 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2776 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2777 vendor_id[VENDOR_LENGTH-1] = '\0';
2778
2779 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2780 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2781 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2782 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2783 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2784 "Supports: ",
2785 brand,
2786 vendor_id,
2787 family,
2788 extended_cpu_family(),
2789 model,
2790 extended_cpu_model(),
2791 cpu_stepping(),
2792 _cpuid_info.std_cpuid1_eax.bits.ext_family,
2793 _cpuid_info.std_cpuid1_eax.bits.ext_model,
2794 _cpuid_info.std_cpuid1_eax.bits.proc_type,
2795 _cpuid_info.std_cpuid1_eax.value,
2796 _cpuid_info.std_cpuid1_ebx.value,
2797 _cpuid_info.std_cpuid1_ecx.value,
2798 _cpuid_info.std_cpuid1_edx.value,
2799 _cpuid_info.ext_cpuid1_eax,
2800 _cpuid_info.ext_cpuid1_ebx,
2801 _cpuid_info.ext_cpuid1_ecx,
2802 _cpuid_info.ext_cpuid1_edx);
2803
2804 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2805 if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2806 return OS_ERR;
2807 }
2808
2809 cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2810
2811 return OS_OK;
2812 }
2813
2814
2815 // Fill in Abstract_VM_Version statics
2816 void VM_Version::initialize_cpu_information() {
2817 assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2818 assert(!_initialized, "shouldn't be initialized yet");
2819 resolve_cpu_information_details();
2820
2821 // initialize cpu_name and cpu_desc
2822 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2823 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2824 _initialized = true;
2825 }
2826
2827 /**
2828 * For information about extracting the frequency from the cpu brand string, please see:
2829 *
2830 * Intel Processor Identification and the CPUID Instruction
2831 * Application Note 485
2832 * May 2012
2833 *
2834 * The return value is the frequency in Hz.
2835 */
2836 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2837 const char* const brand_string = cpu_brand_string();
2838 if (brand_string == nullptr) {
2839 return 0;
2840 }
2841 const int64_t MEGA = 1000000;
2842 int64_t multiplier = 0;
2843 int64_t frequency = 0;
2844 uint8_t idx = 0;
2845 // The brand string buffer is at most 48 bytes.
2846 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2847 for (; idx < 48-2; ++idx) {
2848 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2849 // Search brand string for "yHz" where y is M, G, or T.
2850 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2851 if (brand_string[idx] == 'M') {
2852 multiplier = MEGA;
2853 } else if (brand_string[idx] == 'G') {
2854 multiplier = MEGA * 1000;
2855 } else if (brand_string[idx] == 'T') {
2856 multiplier = MEGA * MEGA;
2857 }
2858 break;
2859 }
2860 }
2861 if (multiplier > 0) {
2862 // Compute frequency (in Hz) from brand string.
2863 if (brand_string[idx-3] == '.') { // if format is "x.xx"
2864 frequency = (brand_string[idx-4] - '0') * multiplier;
2865 frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2866 frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2867 } else { // format is "xxxx"
2868 frequency = (brand_string[idx-4] - '0') * 1000;
2869 frequency += (brand_string[idx-3] - '0') * 100;
2870 frequency += (brand_string[idx-2] - '0') * 10;
2871 frequency += (brand_string[idx-1] - '0');
2872 frequency *= multiplier;
2873 }
2874 }
2875 return frequency;
2876 }
2877
2878
2879 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2880 if (_max_qualified_cpu_frequency == 0) {
2881 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2882 }
2883 return _max_qualified_cpu_frequency;
2884 }
2885
2886 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2887 VM_Features vm_features;
2888 if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2889 vm_features.set_feature(CPU_CX8);
2890 if (std_cpuid1_edx.bits.cmov != 0)
2891 vm_features.set_feature(CPU_CMOV);
2892 if (std_cpuid1_edx.bits.clflush != 0)
2893 vm_features.set_feature(CPU_FLUSH);
2894 // clflush should always be available on x86_64
2895 // if not we are in real trouble because we rely on it
2896 // to flush the code cache.
2897 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2898 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2899 ext_cpuid1_edx.bits.fxsr != 0))
2900 vm_features.set_feature(CPU_FXSR);
2901 // HT flag is set for multi-core processors also.
2902 if (threads_per_core() > 1)
2903 vm_features.set_feature(CPU_HT);
2904 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2905 ext_cpuid1_edx.bits.mmx != 0))
2906 vm_features.set_feature(CPU_MMX);
2907 if (std_cpuid1_edx.bits.sse != 0)
2908 vm_features.set_feature(CPU_SSE);
2909 if (std_cpuid1_edx.bits.sse2 != 0)
2910 vm_features.set_feature(CPU_SSE2);
2911 if (std_cpuid1_ecx.bits.sse3 != 0)
2912 vm_features.set_feature(CPU_SSE3);
2913 if (std_cpuid1_ecx.bits.ssse3 != 0)
2914 vm_features.set_feature(CPU_SSSE3);
2915 if (std_cpuid1_ecx.bits.sse4_1 != 0)
2916 vm_features.set_feature(CPU_SSE4_1);
2917 if (std_cpuid1_ecx.bits.sse4_2 != 0)
2918 vm_features.set_feature(CPU_SSE4_2);
2919 if (std_cpuid1_ecx.bits.popcnt != 0)
2920 vm_features.set_feature(CPU_POPCNT);
2921 if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2922 xem_xcr0_eax.bits.apx_f != 0 &&
2923 std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2924 vm_features.set_feature(CPU_APX_F);
2925 }
2926 if (std_cpuid1_ecx.bits.avx != 0 &&
2927 std_cpuid1_ecx.bits.osxsave != 0 &&
2928 xem_xcr0_eax.bits.sse != 0 &&
2929 xem_xcr0_eax.bits.ymm != 0) {
2930 vm_features.set_feature(CPU_AVX);
2931 vm_features.set_feature(CPU_VZEROUPPER);
2932 if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2933 vm_features.set_feature(CPU_SHA512);
2934 if (std_cpuid1_ecx.bits.f16c != 0)
2935 vm_features.set_feature(CPU_F16C);
2936 if (sef_cpuid7_ebx.bits.avx2 != 0) {
2937 vm_features.set_feature(CPU_AVX2);
2938 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2939 vm_features.set_feature(CPU_AVX_IFMA);
2940 }
2941 if (sef_cpuid7_ecx.bits.gfni != 0)
2942 vm_features.set_feature(CPU_GFNI);
2943 if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2944 xem_xcr0_eax.bits.opmask != 0 &&
2945 xem_xcr0_eax.bits.zmm512 != 0 &&
2946 xem_xcr0_eax.bits.zmm32 != 0) {
2947 vm_features.set_feature(CPU_AVX512F);
2948 if (sef_cpuid7_ebx.bits.avx512cd != 0)
2949 vm_features.set_feature(CPU_AVX512CD);
2950 if (sef_cpuid7_ebx.bits.avx512dq != 0)
2951 vm_features.set_feature(CPU_AVX512DQ);
2952 if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2953 vm_features.set_feature(CPU_AVX512_IFMA);
2954 if (sef_cpuid7_ebx.bits.avx512pf != 0)
2955 vm_features.set_feature(CPU_AVX512PF);
2956 if (sef_cpuid7_ebx.bits.avx512er != 0)
2957 vm_features.set_feature(CPU_AVX512ER);
2958 if (sef_cpuid7_ebx.bits.avx512bw != 0)
2959 vm_features.set_feature(CPU_AVX512BW);
2960 if (sef_cpuid7_ebx.bits.avx512vl != 0)
2961 vm_features.set_feature(CPU_AVX512VL);
2962 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2963 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2964 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2965 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2966 if (sef_cpuid7_ecx.bits.vaes != 0)
2967 vm_features.set_feature(CPU_AVX512_VAES);
2968 if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2969 vm_features.set_feature(CPU_AVX512_VNNI);
2970 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2971 vm_features.set_feature(CPU_AVX512_BITALG);
2972 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2973 vm_features.set_feature(CPU_AVX512_VBMI);
2974 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2975 vm_features.set_feature(CPU_AVX512_VBMI2);
2976 }
2977 if (is_intel()) {
2978 if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2979 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2980 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2981 xem_xcr0_eax.bits.opmask != 0 &&
2982 xem_xcr0_eax.bits.zmm512 != 0 &&
2983 xem_xcr0_eax.bits.zmm32 != 0) {
2984 vm_features.set_feature(CPU_AVX10_1);
2985 vm_features.set_feature(CPU_AVX512F);
2986 vm_features.set_feature(CPU_AVX512CD);
2987 vm_features.set_feature(CPU_AVX512DQ);
2988 vm_features.set_feature(CPU_AVX512PF);
2989 vm_features.set_feature(CPU_AVX512ER);
2990 vm_features.set_feature(CPU_AVX512BW);
2991 vm_features.set_feature(CPU_AVX512VL);
2992 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2993 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2994 vm_features.set_feature(CPU_AVX512_VAES);
2995 vm_features.set_feature(CPU_AVX512_VNNI);
2996 vm_features.set_feature(CPU_AVX512_BITALG);
2997 vm_features.set_feature(CPU_AVX512_VBMI);
2998 vm_features.set_feature(CPU_AVX512_VBMI2);
2999 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3000 vm_features.set_feature(CPU_AVX10_2);
3001 }
3002 }
3003 }
3004 }
3005
3006 if (std_cpuid1_ecx.bits.hv != 0)
3007 vm_features.set_feature(CPU_HV);
3008 if (sef_cpuid7_ebx.bits.bmi1 != 0)
3009 vm_features.set_feature(CPU_BMI1);
3010 if (std_cpuid1_edx.bits.tsc != 0)
3011 vm_features.set_feature(CPU_TSC);
3012 if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3013 vm_features.set_feature(CPU_TSCINV_BIT);
3014 if (std_cpuid1_ecx.bits.aes != 0)
3015 vm_features.set_feature(CPU_AES);
3016 if (ext_cpuid1_ecx.bits.lzcnt != 0)
3017 vm_features.set_feature(CPU_LZCNT);
3018 if (ext_cpuid1_ecx.bits.prefetchw != 0)
3019 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3020 if (sef_cpuid7_ebx.bits.erms != 0)
3021 vm_features.set_feature(CPU_ERMS);
3022 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3023 vm_features.set_feature(CPU_FSRM);
3024 if (std_cpuid1_ecx.bits.clmul != 0)
3025 vm_features.set_feature(CPU_CLMUL);
3026 if (sef_cpuid7_ebx.bits.rtm != 0)
3027 vm_features.set_feature(CPU_RTM);
3028 if (sef_cpuid7_ebx.bits.adx != 0)
3029 vm_features.set_feature(CPU_ADX);
3030 if (sef_cpuid7_ebx.bits.bmi2 != 0)
3031 vm_features.set_feature(CPU_BMI2);
3032 if (sef_cpuid7_ebx.bits.sha != 0)
3033 vm_features.set_feature(CPU_SHA);
3034 if (std_cpuid1_ecx.bits.fma != 0)
3035 vm_features.set_feature(CPU_FMA);
3036 if (sef_cpuid7_ebx.bits.clflushopt != 0)
3037 vm_features.set_feature(CPU_FLUSHOPT);
3038 if (sef_cpuid7_ebx.bits.clwb != 0)
3039 vm_features.set_feature(CPU_CLWB);
3040 if (ext_cpuid1_edx.bits.rdtscp != 0)
3041 vm_features.set_feature(CPU_RDTSCP);
3042 if (sef_cpuid7_ecx.bits.rdpid != 0)
3043 vm_features.set_feature(CPU_RDPID);
3044
3045 // AMD|Hygon additional features.
3046 if (is_amd_family()) {
3047 // PREFETCHW was checked above, check TDNOW here.
3048 if ((ext_cpuid1_edx.bits.tdnow != 0))
3049 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3050 if (ext_cpuid1_ecx.bits.sse4a != 0)
3051 vm_features.set_feature(CPU_SSE4A);
3052 }
3053
3054 // Intel additional features.
3055 if (is_intel()) {
3056 if (sef_cpuid7_edx.bits.serialize != 0)
3057 vm_features.set_feature(CPU_SERIALIZE);
3058 if (sef_cpuid7_edx.bits.hybrid != 0)
3059 vm_features.set_feature(CPU_HYBRID);
3060 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3061 vm_features.set_feature(CPU_AVX512_FP16);
3062 }
3063
3064 // ZX additional features.
3065 if (is_zx()) {
3066 // We do not know if these are supported by ZX, so we cannot trust
3067 // common CPUID bit for them.
3068 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3069 vm_features.clear_feature(CPU_CLWB);
3070 }
3071
3072 // Protection key features.
3073 if (sef_cpuid7_ecx.bits.pku != 0) {
3074 vm_features.set_feature(CPU_PKU);
3075 }
3076 if (sef_cpuid7_ecx.bits.ospke != 0) {
3077 vm_features.set_feature(CPU_OSPKE);
3078 }
3079
3080 // Control flow enforcement (CET) features.
3081 if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3082 vm_features.set_feature(CPU_CET_SS);
3083 }
3084 if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3085 vm_features.set_feature(CPU_CET_IBT);
3086 }
3087
3088 // Composite features.
3089 if (supports_tscinv_bit() &&
3090 ((is_amd_family() && !is_amd_Barcelona()) ||
3091 is_intel_tsc_synched_at_init())) {
3092 vm_features.set_feature(CPU_TSCINV);
3093 }
3094 return vm_features;
3095 }
3096
3097 bool VM_Version::os_supports_avx_vectors() {
3098 bool retVal = false;
3099 int nreg = 4;
3100 if (supports_evex()) {
3101 // Verify that OS save/restore all bits of EVEX registers
3102 // during signal processing.
3103 retVal = true;
3104 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3105 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3106 retVal = false;
3107 break;
3108 }
3109 }
3110 } else if (supports_avx()) {
3111 // Verify that OS save/restore all bits of AVX registers
3112 // during signal processing.
3113 retVal = true;
3114 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3115 if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3116 retVal = false;
3117 break;
3118 }
3119 }
3120 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3121 if (retVal == false) {
3122 // Verify that OS save/restore all bits of EVEX registers
3123 // during signal processing.
3124 retVal = true;
3125 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3126 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3127 retVal = false;
3128 break;
3129 }
3130 }
3131 }
3132 }
3133 return retVal;
3134 }
3135
3136 bool VM_Version::os_supports_apx_egprs() {
3137 if (!supports_apx_f()) {
3138 return false;
3139 }
3140 if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3141 _cpuid_info.apx_save[1] != egpr_test_value()) {
3142 return false;
3143 }
3144 return true;
3145 }
3146
3147 uint VM_Version::cores_per_cpu() {
3148 uint result = 1;
3149 if (is_intel()) {
3150 bool supports_topology = supports_processor_topology();
3151 if (supports_topology) {
3152 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3153 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3154 }
3155 if (!supports_topology || result == 0) {
3156 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3157 }
3158 } else if (is_amd_family()) {
3159 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3160 if (cpu_family() >= 0x17) { // Zen or later
3161 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3162 }
3163 } else if (is_zx()) {
3164 bool supports_topology = supports_processor_topology();
3165 if (supports_topology) {
3166 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3167 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3168 }
3169 if (!supports_topology || result == 0) {
3170 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3171 }
3172 }
3173 return result;
3174 }
3175
3176 uint VM_Version::threads_per_core() {
3177 uint result = 1;
3178 if (is_intel() && supports_processor_topology()) {
3179 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3180 } else if (is_zx() && supports_processor_topology()) {
3181 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3182 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3183 if (cpu_family() >= 0x17) {
3184 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3185 } else {
3186 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3187 cores_per_cpu();
3188 }
3189 }
3190 return (result == 0 ? 1 : result);
3191 }
3192
3193 uint VM_Version::L1_line_size() {
3194 uint result = 0;
3195 if (is_intel()) {
3196 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3197 } else if (is_amd_family()) {
3198 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3199 } else if (is_zx()) {
3200 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3201 }
3202 if (result < 32) // not defined ?
3203 result = 32; // 32 bytes by default on x86 and other x64
3204 return result;
3205 }
3206
3207 bool VM_Version::is_intel_tsc_synched_at_init() {
3208 if (is_intel_family_core()) {
3209 uint32_t ext_model = extended_cpu_model();
3210 if (ext_model == CPU_MODEL_NEHALEM_EP ||
3211 ext_model == CPU_MODEL_WESTMERE_EP ||
3212 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3213 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3214 // <= 2-socket invariant tsc support. EX versions are usually used
3215 // in > 2-socket systems and likely don't synchronize tscs at
3216 // initialization.
3217 // Code that uses tsc values must be prepared for them to arbitrarily
3218 // jump forward or backward.
3219 return true;
3220 }
3221 }
3222 return false;
3223 }
3224
3225 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3226 // Hardware prefetching (distance/size in bytes):
3227 // Pentium 3 - 64 / 32
3228 // Pentium 4 - 256 / 128
3229 // Athlon - 64 / 32 ????
3230 // Opteron - 128 / 64 only when 2 sequential cache lines accessed
3231 // Core - 128 / 64
3232 //
3233 // Software prefetching (distance in bytes / instruction with best score):
3234 // Pentium 3 - 128 / prefetchnta
3235 // Pentium 4 - 512 / prefetchnta
3236 // Athlon - 128 / prefetchnta
3237 // Opteron - 256 / prefetchnta
3238 // Core - 256 / prefetchnta
3239 // It will be used only when AllocatePrefetchStyle > 0
3240
3241 if (is_amd_family()) { // AMD | Hygon
3242 if (supports_sse2()) {
3243 return 256; // Opteron
3244 } else {
3245 return 128; // Athlon
3246 }
3247 } else { // Intel
3248 if (supports_sse3() && is_intel_server_family()) {
3249 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3250 return 192;
3251 } else if (use_watermark_prefetch) { // watermark prefetching on Core
3252 return 384;
3253 }
3254 }
3255 if (supports_sse2()) {
3256 if (is_intel_server_family()) {
3257 return 256; // Pentium M, Core, Core2
3258 } else {
3259 return 512; // Pentium 4
3260 }
3261 } else {
3262 return 128; // Pentium 3 (and all other old CPUs)
3263 }
3264 }
3265 }
3266
3267 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3268 assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3269 switch (id) {
3270 case vmIntrinsics::_floatToFloat16:
3271 case vmIntrinsics::_float16ToFloat:
3272 if (!supports_float16()) {
3273 return false;
3274 }
3275 break;
3276 default:
3277 break;
3278 }
3279 return true;
3280 }
3281
3282 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3283 int i = 0;
3284 ss.join([&]() {
3285 const char* str = nullptr;
3286 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3287 if (features.supports_feature((VM_Version::Feature_Flag)i)) {
3288 str = _features_names[i];
3289 }
3290 i += 1;
3291 }
3292 return str;
3293 }, ", ");
3294 }
3295
3296 void VM_Version::get_cpu_features_name(void* features_buffer, stringStream& ss) {
3297 VM_Features* features = (VM_Features*)features_buffer;
3298 insert_features_names(*features, ss);
3299 }
3300
3301 void VM_Version::get_missing_features_name(void* features_buffer, stringStream& ss) {
3302 VM_Features* features_to_test = (VM_Features*)features_buffer;
3303 int i = 0;
3304 ss.join([&]() {
3305 const char* str = nullptr;
3306 while ((i < MAX_CPU_FEATURES) && (str == nullptr)) {
3307 Feature_Flag flag = (Feature_Flag)i;
3308 if (features_to_test->supports_feature(flag) && !_features.supports_feature(flag)) {
3309 str = _features_names[i];
3310 }
3311 i += 1;
3312 }
3313 return str;
3314 }, ", ");
3315 }
3316
3317 int VM_Version::cpu_features_size() {
3318 return sizeof(VM_Features);
3319 }
3320
3321 void VM_Version::store_cpu_features(void* buf) {
3322 VM_Features copy = _features;
3323 copy.clear_feature(CPU_HT); // HT does not result in incompatibility of aot code cache
3324 memcpy(buf, ©, sizeof(VM_Features));
3325 }
3326
3327 bool VM_Version::supports_features(void* features_buffer) {
3328 VM_Features* features_to_test = (VM_Features*)features_buffer;
3329 return _features.supports_features(features_to_test);
3330 }