1 /*
2 * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25 #include "asm/macroAssembler.hpp"
26 #include "asm/macroAssembler.inline.hpp"
27 #include "classfile/vmIntrinsics.hpp"
28 #include "code/codeBlob.hpp"
29 #include "compiler/compilerDefinitions.inline.hpp"
30 #include "jvm.h"
31 #include "logging/log.hpp"
32 #include "logging/logStream.hpp"
33 #include "memory/resourceArea.hpp"
34 #include "memory/universe.hpp"
35 #include "runtime/globals_extension.hpp"
36 #include "runtime/java.hpp"
37 #include "runtime/os.inline.hpp"
38 #include "runtime/stubCodeGenerator.hpp"
39 #include "runtime/vm_version.hpp"
40 #include "utilities/checkedCast.hpp"
41 #include "utilities/ostream.hpp"
42 #include "utilities/powerOfTwo.hpp"
43 #include "utilities/virtualizationSupport.hpp"
44
45 int VM_Version::_cpu;
46 int VM_Version::_model;
47 int VM_Version::_stepping;
48 bool VM_Version::_has_intel_jcc_erratum;
49 VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, };
50
51 #define DECLARE_CPU_FEATURE_NAME(id, name, bit) name,
52 const char* VM_Version::_features_names[] = { CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_NAME)};
53 #undef DECLARE_CPU_FEATURE_NAME
54
55 // Address of instruction which causes SEGV
56 address VM_Version::_cpuinfo_segv_addr = nullptr;
57 // Address of instruction after the one which causes SEGV
58 address VM_Version::_cpuinfo_cont_addr = nullptr;
59 // Address of instruction which causes APX specific SEGV
60 address VM_Version::_cpuinfo_segv_addr_apx = nullptr;
61 // Address of instruction after the one which causes APX specific SEGV
62 address VM_Version::_cpuinfo_cont_addr_apx = nullptr;
63
64 static BufferBlob* stub_blob;
65 static const int stub_size = 2000;
66
67 int VM_Version::VM_Features::_features_bitmap_size = sizeof(VM_Version::VM_Features::_features_bitmap) / BytesPerLong;
68
69 VM_Version::VM_Features VM_Version::_features;
70 VM_Version::VM_Features VM_Version::_cpu_features;
71
72 extern "C" {
73 typedef void (*get_cpu_info_stub_t)(void*);
74 typedef void (*detect_virt_stub_t)(uint32_t, uint32_t*);
75 typedef void (*clear_apx_test_state_t)(void);
76 }
77 static get_cpu_info_stub_t get_cpu_info_stub = nullptr;
78 static detect_virt_stub_t detect_virt_stub = nullptr;
79 static clear_apx_test_state_t clear_apx_test_state_stub = nullptr;
80
81 bool VM_Version::supports_clflush() {
82 // clflush should always be available on x86_64
83 // if not we are in real trouble because we rely on it
84 // to flush the code cache.
85 // Unfortunately, Assembler::clflush is currently called as part
86 // of generation of the code cache flush routine. This happens
87 // under Universe::init before the processor features are set
88 // up. Assembler::flush calls this routine to check that clflush
89 // is allowed. So, we give the caller a free pass if Universe init
90 // is still in progress.
91 assert ((!Universe::is_fully_initialized() || _features.supports_feature(CPU_FLUSH)), "clflush should be available");
92 return true;
93 }
94
95 #define CPUID_STANDARD_FN 0x0
96 #define CPUID_STANDARD_FN_1 0x1
97 #define CPUID_STANDARD_FN_4 0x4
98 #define CPUID_STANDARD_FN_B 0xb
99
100 #define CPUID_EXTENDED_FN 0x80000000
101 #define CPUID_EXTENDED_FN_1 0x80000001
102 #define CPUID_EXTENDED_FN_2 0x80000002
103 #define CPUID_EXTENDED_FN_3 0x80000003
104 #define CPUID_EXTENDED_FN_4 0x80000004
105 #define CPUID_EXTENDED_FN_7 0x80000007
106 #define CPUID_EXTENDED_FN_8 0x80000008
107
108 class VM_Version_StubGenerator: public StubCodeGenerator {
109 public:
110
111 VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
112
113 address clear_apx_test_state() {
114 # define __ _masm->
115 address start = __ pc();
116 // EGPRs are call clobbered registers, Explicit clearing of r16 and r31 during signal
117 // handling guarantees that preserved register values post signal handling were
118 // re-instantiated by operating system and not because they were not modified externally.
119
120 bool save_apx = UseAPX;
121 VM_Version::set_apx_cpuFeatures();
122 UseAPX = true;
123 // EGPR state save/restoration.
124 __ mov64(r16, 0L);
125 __ mov64(r31, 0L);
126 UseAPX = save_apx;
127 VM_Version::clean_cpuFeatures();
128 __ ret(0);
129 return start;
130 }
131
132 address generate_get_cpu_info() {
133 // Flags to test CPU type.
134 const uint32_t HS_EFL_AC = 0x40000;
135 const uint32_t HS_EFL_ID = 0x200000;
136 // Values for when we don't have a CPUID instruction.
137 const int CPU_FAMILY_SHIFT = 8;
138 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
139 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
140 bool use_evex = FLAG_IS_DEFAULT(UseAVX) || (UseAVX > 2);
141
142 Label detect_486, cpu486, detect_586, std_cpuid1, std_cpuid4, std_cpuid24, std_cpuid29;
143 Label sef_cpuid, sefsl1_cpuid, ext_cpuid, ext_cpuid1, ext_cpuid5, ext_cpuid7;
144 Label ext_cpuid8, done, wrapup, vector_save_restore, apx_save_restore_warning;
145 Label legacy_setup, save_restore_except, legacy_save_restore, start_simd_check;
146
147 StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
148 # define __ _masm->
149
150 address start = __ pc();
151
152 //
153 // void get_cpu_info(VM_Version::CpuidInfo* cpuid_info);
154 //
155 // rcx and rdx are first and second argument registers on windows
156
157 __ push(rbp);
158 __ mov(rbp, c_rarg0); // cpuid_info address
159 __ push(rbx);
160 __ push(rsi);
161 __ pushf(); // preserve rbx, and flags
162 __ pop(rax);
163 __ push(rax);
164 __ mov(rcx, rax);
165 //
166 // if we are unable to change the AC flag, we have a 386
167 //
168 __ xorl(rax, HS_EFL_AC);
169 __ push(rax);
170 __ popf();
171 __ pushf();
172 __ pop(rax);
173 __ cmpptr(rax, rcx);
174 __ jccb(Assembler::notEqual, detect_486);
175
176 __ movl(rax, CPU_FAMILY_386);
177 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
178 __ jmp(done);
179
180 //
181 // If we are unable to change the ID flag, we have a 486 which does
182 // not support the "cpuid" instruction.
183 //
184 __ bind(detect_486);
185 __ mov(rax, rcx);
186 __ xorl(rax, HS_EFL_ID);
187 __ push(rax);
188 __ popf();
189 __ pushf();
190 __ pop(rax);
191 __ cmpptr(rcx, rax);
192 __ jccb(Assembler::notEqual, detect_586);
193
194 __ bind(cpu486);
195 __ movl(rax, CPU_FAMILY_486);
196 __ movl(Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())), rax);
197 __ jmp(done);
198
199 //
200 // At this point, we have a chip which supports the "cpuid" instruction
201 //
202 __ bind(detect_586);
203 __ xorl(rax, rax);
204 __ cpuid();
205 __ orl(rax, rax);
206 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
207 // value of at least 1, we give up and
208 // assume a 486
209 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
210 __ movl(Address(rsi, 0), rax);
211 __ movl(Address(rsi, 4), rbx);
212 __ movl(Address(rsi, 8), rcx);
213 __ movl(Address(rsi,12), rdx);
214
215 __ cmpl(rax, 0xa); // Is cpuid(0xB) supported?
216 __ jccb(Assembler::belowEqual, std_cpuid4);
217
218 //
219 // cpuid(0xB) Processor Topology
220 //
221 __ movl(rax, 0xb);
222 __ xorl(rcx, rcx); // Threads level
223 __ cpuid();
224
225 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB0_offset())));
226 __ movl(Address(rsi, 0), rax);
227 __ movl(Address(rsi, 4), rbx);
228 __ movl(Address(rsi, 8), rcx);
229 __ movl(Address(rsi,12), rdx);
230
231 __ movl(rax, 0xb);
232 __ movl(rcx, 1); // Cores level
233 __ cpuid();
234 __ push(rax);
235 __ andl(rax, 0x1f); // Determine if valid topology level
236 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
237 __ andl(rax, 0xffff);
238 __ pop(rax);
239 __ jccb(Assembler::equal, std_cpuid4);
240
241 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB1_offset())));
242 __ movl(Address(rsi, 0), rax);
243 __ movl(Address(rsi, 4), rbx);
244 __ movl(Address(rsi, 8), rcx);
245 __ movl(Address(rsi,12), rdx);
246
247 __ movl(rax, 0xb);
248 __ movl(rcx, 2); // Packages level
249 __ cpuid();
250 __ push(rax);
251 __ andl(rax, 0x1f); // Determine if valid topology level
252 __ orl(rax, rbx); // eax[4:0] | ebx[0:15] == 0 indicates invalid level
253 __ andl(rax, 0xffff);
254 __ pop(rax);
255 __ jccb(Assembler::equal, std_cpuid4);
256
257 __ lea(rsi, Address(rbp, in_bytes(VM_Version::tpl_cpuidB2_offset())));
258 __ movl(Address(rsi, 0), rax);
259 __ movl(Address(rsi, 4), rbx);
260 __ movl(Address(rsi, 8), rcx);
261 __ movl(Address(rsi,12), rdx);
262
263 //
264 // cpuid(0x4) Deterministic cache params
265 //
266 __ bind(std_cpuid4);
267 __ movl(rax, 4);
268 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x4) supported?
269 __ jccb(Assembler::greater, std_cpuid1);
270
271 __ xorl(rcx, rcx); // L1 cache
272 __ cpuid();
273 __ push(rax);
274 __ andl(rax, 0x1f); // Determine if valid cache parameters used
275 __ orl(rax, rax); // eax[4:0] == 0 indicates invalid cache
276 __ pop(rax);
277 __ jccb(Assembler::equal, std_cpuid1);
278
279 __ lea(rsi, Address(rbp, in_bytes(VM_Version::dcp_cpuid4_offset())));
280 __ movl(Address(rsi, 0), rax);
281 __ movl(Address(rsi, 4), rbx);
282 __ movl(Address(rsi, 8), rcx);
283 __ movl(Address(rsi,12), rdx);
284
285 //
286 // Standard cpuid(0x1)
287 //
288 __ bind(std_cpuid1);
289 __ movl(rax, 1);
290 __ cpuid();
291 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
292 __ movl(Address(rsi, 0), rax);
293 __ movl(Address(rsi, 4), rbx);
294 __ movl(Address(rsi, 8), rcx);
295 __ movl(Address(rsi,12), rdx);
296
297 //
298 // Check if OS has enabled XGETBV instruction to access XCR0
299 // (OSXSAVE feature flag) and CPU supports AVX
300 //
301 __ andl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
302 __ cmpl(rcx, 0x18000000);
303 __ jccb(Assembler::notEqual, sef_cpuid); // jump if AVX is not supported
304
305 //
306 // XCR0, XFEATURE_ENABLED_MASK register
307 //
308 __ xorl(rcx, rcx); // zero for XCR0 register
309 __ xgetbv();
310 __ lea(rsi, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset())));
311 __ movl(Address(rsi, 0), rax);
312 __ movl(Address(rsi, 4), rdx);
313
314 //
315 // cpuid(0x7) Structured Extended Features Enumeration Leaf.
316 //
317 __ bind(sef_cpuid);
318 __ movl(rax, 7);
319 __ cmpl(rax, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset()))); // Is cpuid(0x7) supported?
320 __ jccb(Assembler::greater, ext_cpuid);
321 // ECX = 0
322 __ xorl(rcx, rcx);
323 __ cpuid();
324 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
325 __ movl(Address(rsi, 0), rax);
326 __ movl(Address(rsi, 4), rbx);
327 __ movl(Address(rsi, 8), rcx);
328 __ movl(Address(rsi, 12), rdx);
329
330 //
331 // cpuid(0x7) Structured Extended Features Enumeration Sub-Leaf 1.
332 //
333 __ bind(sefsl1_cpuid);
334 __ movl(rax, 7);
335 __ movl(rcx, 1);
336 __ cpuid();
337 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
338 __ movl(Address(rsi, 0), rax);
339 __ movl(Address(rsi, 4), rdx);
340
341 //
342 // cpuid(0x29) APX NCI NDD NF (EAX = 29H, ECX = 0).
343 //
344 __ bind(std_cpuid29);
345 __ movl(rax, 0x29);
346 __ movl(rcx, 0);
347 __ cpuid();
348 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid29_offset())));
349 __ movl(Address(rsi, 0), rbx);
350
351 //
352 // cpuid(0x24) Converged Vector ISA Main Leaf (EAX = 24H, ECX = 0).
353 //
354 __ bind(std_cpuid24);
355 __ movl(rax, 0x24);
356 __ movl(rcx, 0);
357 __ cpuid();
358 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid24_offset())));
359 __ movl(Address(rsi, 0), rax);
360 __ movl(Address(rsi, 4), rbx);
361
362 //
363 // Extended cpuid(0x80000000)
364 //
365 __ bind(ext_cpuid);
366 __ movl(rax, 0x80000000);
367 __ cpuid();
368 __ cmpl(rax, 0x80000000); // Is cpuid(0x80000001) supported?
369 __ jcc(Assembler::belowEqual, done);
370 __ cmpl(rax, 0x80000004); // Is cpuid(0x80000005) supported?
371 __ jcc(Assembler::belowEqual, ext_cpuid1);
372 __ cmpl(rax, 0x80000006); // Is cpuid(0x80000007) supported?
373 __ jccb(Assembler::belowEqual, ext_cpuid5);
374 __ cmpl(rax, 0x80000007); // Is cpuid(0x80000008) supported?
375 __ jccb(Assembler::belowEqual, ext_cpuid7);
376 __ cmpl(rax, 0x80000008); // Is cpuid(0x80000009 and above) supported?
377 __ jccb(Assembler::belowEqual, ext_cpuid8);
378 __ cmpl(rax, 0x8000001E); // Is cpuid(0x8000001E) supported?
379 __ jccb(Assembler::below, ext_cpuid8);
380 //
381 // Extended cpuid(0x8000001E)
382 //
383 __ movl(rax, 0x8000001E);
384 __ cpuid();
385 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1E_offset())));
386 __ movl(Address(rsi, 0), rax);
387 __ movl(Address(rsi, 4), rbx);
388 __ movl(Address(rsi, 8), rcx);
389 __ movl(Address(rsi,12), rdx);
390
391 //
392 // Extended cpuid(0x80000008)
393 //
394 __ bind(ext_cpuid8);
395 __ movl(rax, 0x80000008);
396 __ cpuid();
397 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid8_offset())));
398 __ movl(Address(rsi, 0), rax);
399 __ movl(Address(rsi, 4), rbx);
400 __ movl(Address(rsi, 8), rcx);
401 __ movl(Address(rsi,12), rdx);
402
403 //
404 // Extended cpuid(0x80000007)
405 //
406 __ bind(ext_cpuid7);
407 __ movl(rax, 0x80000007);
408 __ cpuid();
409 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid7_offset())));
410 __ movl(Address(rsi, 0), rax);
411 __ movl(Address(rsi, 4), rbx);
412 __ movl(Address(rsi, 8), rcx);
413 __ movl(Address(rsi,12), rdx);
414
415 //
416 // Extended cpuid(0x80000005)
417 //
418 __ bind(ext_cpuid5);
419 __ movl(rax, 0x80000005);
420 __ cpuid();
421 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid5_offset())));
422 __ movl(Address(rsi, 0), rax);
423 __ movl(Address(rsi, 4), rbx);
424 __ movl(Address(rsi, 8), rcx);
425 __ movl(Address(rsi,12), rdx);
426
427 //
428 // Extended cpuid(0x80000001)
429 //
430 __ bind(ext_cpuid1);
431 __ movl(rax, 0x80000001);
432 __ cpuid();
433 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ext_cpuid1_offset())));
434 __ movl(Address(rsi, 0), rax);
435 __ movl(Address(rsi, 4), rbx);
436 __ movl(Address(rsi, 8), rcx);
437 __ movl(Address(rsi,12), rdx);
438
439 //
440 // Check if OS has enabled XGETBV instruction to access XCR0
441 // (OSXSAVE feature flag) and CPU supports APX
442 //
443 // To enable APX, check CPUID.EAX=7.ECX=1.EDX[21] bit for HW support
444 // and XCRO[19] bit for OS support to save/restore extended GPR state.
445 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
446 __ movl(rax, 0x200000);
447 __ andl(rax, Address(rsi, 4));
448 __ jcc(Assembler::equal, vector_save_restore);
449 // check _cpuid_info.xem_xcr0_eax.bits.apx_f
450 __ movl(rax, 0x80000);
451 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits apx_f
452 __ jcc(Assembler::equal, vector_save_restore);
453
454 bool save_apx = UseAPX;
455 VM_Version::set_apx_cpuFeatures();
456 UseAPX = true;
457 __ mov64(r16, VM_Version::egpr_test_value());
458 __ mov64(r31, VM_Version::egpr_test_value());
459 __ xorl(rsi, rsi);
460 VM_Version::set_cpuinfo_segv_addr_apx(__ pc());
461 // Generate SEGV
462 __ movl(rax, Address(rsi, 0));
463
464 VM_Version::set_cpuinfo_cont_addr_apx(__ pc());
465 __ lea(rsi, Address(rbp, in_bytes(VM_Version::apx_save_offset())));
466 __ movq(Address(rsi, 0), r16);
467 __ movq(Address(rsi, 8), r31);
468
469 UseAPX = save_apx;
470 __ bind(vector_save_restore);
471 //
472 // Check if OS has enabled XGETBV instruction to access XCR0
473 // (OSXSAVE feature flag) and CPU supports AVX
474 //
475 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
476 __ movl(rcx, 0x18000000); // cpuid1 bits osxsave | avx
477 __ andl(rcx, Address(rsi, 8)); // cpuid1 bits osxsave | avx
478 __ cmpl(rcx, 0x18000000);
479 __ jccb(Assembler::notEqual, done); // jump if AVX is not supported
480
481 __ movl(rax, 0x6);
482 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
483 __ cmpl(rax, 0x6);
484 __ jccb(Assembler::equal, start_simd_check); // return if AVX is not supported
485
486 // we need to bridge farther than imm8, so we use this island as a thunk
487 __ bind(done);
488 __ jmp(wrapup);
489
490 __ bind(start_simd_check);
491 //
492 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
493 // registers are not restored after a signal processing.
494 // Generate SEGV here (reference through null)
495 // and check upper YMM/ZMM bits after it.
496 //
497 int saved_useavx = UseAVX;
498 int saved_usesse = UseSSE;
499
500 // If UseAVX is uninitialized or is set by the user to include EVEX
501 if (use_evex) {
502 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
503 // OR check _cpuid_info.sefsl1_cpuid7_edx.bits.avx10
504 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
505 __ movl(rax, 0x10000);
506 __ andl(rax, Address(rsi, 4));
507 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sefsl1_cpuid7_offset())));
508 __ movl(rbx, 0x80000);
509 __ andl(rbx, Address(rsi, 4));
510 __ orl(rax, rbx);
511 __ jccb(Assembler::equal, legacy_setup); // jump if EVEX is not supported
512 // check _cpuid_info.xem_xcr0_eax.bits.opmask
513 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
514 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
515 __ movl(rax, 0xE0);
516 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
517 __ cmpl(rax, 0xE0);
518 __ jccb(Assembler::notEqual, legacy_setup); // jump if EVEX is not supported
519
520 if (FLAG_IS_DEFAULT(UseAVX)) {
521 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
522 __ movl(rax, Address(rsi, 0));
523 __ cmpl(rax, 0x50654); // If it is Skylake
524 __ jcc(Assembler::equal, legacy_setup);
525 }
526 // EVEX setup: run in lowest evex mode
527 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
528 UseAVX = 3;
529 UseSSE = 2;
530 #ifdef _WINDOWS
531 // xmm5-xmm15 are not preserved by caller on windows
532 // https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx
533 __ subptr(rsp, 64);
534 __ evmovdqul(Address(rsp, 0), xmm7, Assembler::AVX_512bit);
535 __ subptr(rsp, 64);
536 __ evmovdqul(Address(rsp, 0), xmm8, Assembler::AVX_512bit);
537 __ subptr(rsp, 64);
538 __ evmovdqul(Address(rsp, 0), xmm31, Assembler::AVX_512bit);
539 #endif // _WINDOWS
540
541 // load value into all 64 bytes of zmm7 register
542 __ movl(rcx, VM_Version::ymm_test_value());
543 __ movdl(xmm0, rcx);
544 __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
545 __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
546 __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
547 __ evmovdqul(xmm31, xmm0, Assembler::AVX_512bit);
548 VM_Version::clean_cpuFeatures();
549 __ jmp(save_restore_except);
550 }
551
552 __ bind(legacy_setup);
553 // AVX setup
554 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
555 UseAVX = 1;
556 UseSSE = 2;
557 #ifdef _WINDOWS
558 __ subptr(rsp, 32);
559 __ vmovdqu(Address(rsp, 0), xmm7);
560 __ subptr(rsp, 32);
561 __ vmovdqu(Address(rsp, 0), xmm8);
562 __ subptr(rsp, 32);
563 __ vmovdqu(Address(rsp, 0), xmm15);
564 #endif // _WINDOWS
565
566 // load value into all 32 bytes of ymm7 register
567 __ movl(rcx, VM_Version::ymm_test_value());
568
569 __ movdl(xmm0, rcx);
570 __ pshufd(xmm0, xmm0, 0x00);
571 __ vinsertf128_high(xmm0, xmm0);
572 __ vmovdqu(xmm7, xmm0);
573 __ vmovdqu(xmm8, xmm0);
574 __ vmovdqu(xmm15, xmm0);
575 VM_Version::clean_cpuFeatures();
576
577 __ bind(save_restore_except);
578 __ xorl(rsi, rsi);
579 VM_Version::set_cpuinfo_segv_addr(__ pc());
580 // Generate SEGV
581 __ movl(rax, Address(rsi, 0));
582
583 VM_Version::set_cpuinfo_cont_addr(__ pc());
584 // Returns here after signal. Save xmm0 to check it later.
585
586 // If UseAVX is uninitialized or is set by the user to include EVEX
587 if (use_evex) {
588 // check _cpuid_info.sef_cpuid7_ebx.bits.avx512f
589 __ lea(rsi, Address(rbp, in_bytes(VM_Version::sef_cpuid7_offset())));
590 __ movl(rax, 0x10000);
591 __ andl(rax, Address(rsi, 4));
592 __ jcc(Assembler::equal, legacy_save_restore);
593 // check _cpuid_info.xem_xcr0_eax.bits.opmask
594 // check _cpuid_info.xem_xcr0_eax.bits.zmm512
595 // check _cpuid_info.xem_xcr0_eax.bits.zmm32
596 __ movl(rax, 0xE0);
597 __ andl(rax, Address(rbp, in_bytes(VM_Version::xem_xcr0_offset()))); // xcr0 bits sse | ymm
598 __ cmpl(rax, 0xE0);
599 __ jcc(Assembler::notEqual, legacy_save_restore);
600
601 if (FLAG_IS_DEFAULT(UseAVX)) {
602 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
603 __ movl(rax, Address(rsi, 0));
604 __ cmpl(rax, 0x50654); // If it is Skylake
605 __ jcc(Assembler::equal, legacy_save_restore);
606 }
607 // EVEX check: run in lowest evex mode
608 VM_Version::set_evex_cpuFeatures(); // Enable temporary to pass asserts
609 UseAVX = 3;
610 UseSSE = 2;
611 __ lea(rsi, Address(rbp, in_bytes(VM_Version::zmm_save_offset())));
612 __ evmovdqul(Address(rsi, 0), xmm0, Assembler::AVX_512bit);
613 __ evmovdqul(Address(rsi, 64), xmm7, Assembler::AVX_512bit);
614 __ evmovdqul(Address(rsi, 128), xmm8, Assembler::AVX_512bit);
615 __ evmovdqul(Address(rsi, 192), xmm31, Assembler::AVX_512bit);
616
617 #ifdef _WINDOWS
618 __ evmovdqul(xmm31, Address(rsp, 0), Assembler::AVX_512bit);
619 __ addptr(rsp, 64);
620 __ evmovdqul(xmm8, Address(rsp, 0), Assembler::AVX_512bit);
621 __ addptr(rsp, 64);
622 __ evmovdqul(xmm7, Address(rsp, 0), Assembler::AVX_512bit);
623 __ addptr(rsp, 64);
624 #endif // _WINDOWS
625 generate_vzeroupper(wrapup);
626 VM_Version::clean_cpuFeatures();
627 UseAVX = saved_useavx;
628 UseSSE = saved_usesse;
629 __ jmp(wrapup);
630 }
631
632 __ bind(legacy_save_restore);
633 // AVX check
634 VM_Version::set_avx_cpuFeatures(); // Enable temporary to pass asserts
635 UseAVX = 1;
636 UseSSE = 2;
637 __ lea(rsi, Address(rbp, in_bytes(VM_Version::ymm_save_offset())));
638 __ vmovdqu(Address(rsi, 0), xmm0);
639 __ vmovdqu(Address(rsi, 32), xmm7);
640 __ vmovdqu(Address(rsi, 64), xmm8);
641 __ vmovdqu(Address(rsi, 96), xmm15);
642
643 #ifdef _WINDOWS
644 __ vmovdqu(xmm15, Address(rsp, 0));
645 __ addptr(rsp, 32);
646 __ vmovdqu(xmm8, Address(rsp, 0));
647 __ addptr(rsp, 32);
648 __ vmovdqu(xmm7, Address(rsp, 0));
649 __ addptr(rsp, 32);
650 #endif // _WINDOWS
651
652 generate_vzeroupper(wrapup);
653 VM_Version::clean_cpuFeatures();
654 UseAVX = saved_useavx;
655 UseSSE = saved_usesse;
656
657 __ bind(wrapup);
658 __ popf();
659 __ pop(rsi);
660 __ pop(rbx);
661 __ pop(rbp);
662 __ ret(0);
663
664 # undef __
665
666 return start;
667 };
668 void generate_vzeroupper(Label& L_wrapup) {
669 # define __ _masm->
670 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid0_offset())));
671 __ cmpl(Address(rsi, 4), 0x756e6547); // 'uneG'
672 __ jcc(Assembler::notEqual, L_wrapup);
673 __ movl(rcx, 0x0FFF0FF0);
674 __ lea(rsi, Address(rbp, in_bytes(VM_Version::std_cpuid1_offset())));
675 __ andl(rcx, Address(rsi, 0));
676 __ cmpl(rcx, 0x00050670); // If it is Xeon Phi 3200/5200/7200
677 __ jcc(Assembler::equal, L_wrapup);
678 __ cmpl(rcx, 0x00080650); // If it is Future Xeon Phi
679 __ jcc(Assembler::equal, L_wrapup);
680 // vzeroupper() will use a pre-computed instruction sequence that we
681 // can't compute until after we've determined CPU capabilities. Use
682 // uncached variant here directly to be able to bootstrap correctly
683 __ vzeroupper_uncached();
684 # undef __
685 }
686 address generate_detect_virt() {
687 StubCodeMark mark(this, "VM_Version", "detect_virt_stub");
688 # define __ _masm->
689
690 address start = __ pc();
691
692 // Evacuate callee-saved registers
693 __ push(rbp);
694 __ push(rbx);
695 __ push(rsi); // for Windows
696
697 __ mov(rax, c_rarg0); // CPUID leaf
698 __ mov(rsi, c_rarg1); // register array address (eax, ebx, ecx, edx)
699
700 __ cpuid();
701
702 // Store result to register array
703 __ movl(Address(rsi, 0), rax);
704 __ movl(Address(rsi, 4), rbx);
705 __ movl(Address(rsi, 8), rcx);
706 __ movl(Address(rsi, 12), rdx);
707
708 // Epilogue
709 __ pop(rsi);
710 __ pop(rbx);
711 __ pop(rbp);
712 __ ret(0);
713
714 # undef __
715
716 return start;
717 };
718
719
720 address generate_getCPUIDBrandString(void) {
721 // Flags to test CPU type.
722 const uint32_t HS_EFL_AC = 0x40000;
723 const uint32_t HS_EFL_ID = 0x200000;
724 // Values for when we don't have a CPUID instruction.
725 const int CPU_FAMILY_SHIFT = 8;
726 const uint32_t CPU_FAMILY_386 = (3 << CPU_FAMILY_SHIFT);
727 const uint32_t CPU_FAMILY_486 = (4 << CPU_FAMILY_SHIFT);
728
729 Label detect_486, cpu486, detect_586, done, ext_cpuid;
730
731 StubCodeMark mark(this, "VM_Version", "getCPUIDNameInfo_stub");
732 # define __ _masm->
733
734 address start = __ pc();
735
736 //
737 // void getCPUIDBrandString(VM_Version::CpuidInfo* cpuid_info);
738 //
739 // rcx and rdx are first and second argument registers on windows
740
741 __ push(rbp);
742 __ mov(rbp, c_rarg0); // cpuid_info address
743 __ push(rbx);
744 __ push(rsi);
745 __ pushf(); // preserve rbx, and flags
746 __ pop(rax);
747 __ push(rax);
748 __ mov(rcx, rax);
749 //
750 // if we are unable to change the AC flag, we have a 386
751 //
752 __ xorl(rax, HS_EFL_AC);
753 __ push(rax);
754 __ popf();
755 __ pushf();
756 __ pop(rax);
757 __ cmpptr(rax, rcx);
758 __ jccb(Assembler::notEqual, detect_486);
759
760 __ movl(rax, CPU_FAMILY_386);
761 __ jmp(done);
762
763 //
764 // If we are unable to change the ID flag, we have a 486 which does
765 // not support the "cpuid" instruction.
766 //
767 __ bind(detect_486);
768 __ mov(rax, rcx);
769 __ xorl(rax, HS_EFL_ID);
770 __ push(rax);
771 __ popf();
772 __ pushf();
773 __ pop(rax);
774 __ cmpptr(rcx, rax);
775 __ jccb(Assembler::notEqual, detect_586);
776
777 __ bind(cpu486);
778 __ movl(rax, CPU_FAMILY_486);
779 __ jmp(done);
780
781 //
782 // At this point, we have a chip which supports the "cpuid" instruction
783 //
784 __ bind(detect_586);
785 __ xorl(rax, rax);
786 __ cpuid();
787 __ orl(rax, rax);
788 __ jcc(Assembler::equal, cpu486); // if cpuid doesn't support an input
789 // value of at least 1, we give up and
790 // assume a 486
791
792 //
793 // Extended cpuid(0x80000000) for processor brand string detection
794 //
795 __ bind(ext_cpuid);
796 __ movl(rax, CPUID_EXTENDED_FN);
797 __ cpuid();
798 __ cmpl(rax, CPUID_EXTENDED_FN_4);
799 __ jcc(Assembler::below, done);
800
801 //
802 // Extended cpuid(0x80000002) // first 16 bytes in brand string
803 //
804 __ movl(rax, CPUID_EXTENDED_FN_2);
805 __ cpuid();
806 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_0_offset())));
807 __ movl(Address(rsi, 0), rax);
808 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_1_offset())));
809 __ movl(Address(rsi, 0), rbx);
810 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_2_offset())));
811 __ movl(Address(rsi, 0), rcx);
812 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_3_offset())));
813 __ movl(Address(rsi,0), rdx);
814
815 //
816 // Extended cpuid(0x80000003) // next 16 bytes in brand string
817 //
818 __ movl(rax, CPUID_EXTENDED_FN_3);
819 __ cpuid();
820 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_4_offset())));
821 __ movl(Address(rsi, 0), rax);
822 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_5_offset())));
823 __ movl(Address(rsi, 0), rbx);
824 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_6_offset())));
825 __ movl(Address(rsi, 0), rcx);
826 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_7_offset())));
827 __ movl(Address(rsi,0), rdx);
828
829 //
830 // Extended cpuid(0x80000004) // last 16 bytes in brand string
831 //
832 __ movl(rax, CPUID_EXTENDED_FN_4);
833 __ cpuid();
834 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_8_offset())));
835 __ movl(Address(rsi, 0), rax);
836 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_9_offset())));
837 __ movl(Address(rsi, 0), rbx);
838 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_10_offset())));
839 __ movl(Address(rsi, 0), rcx);
840 __ lea(rsi, Address(rbp, in_bytes(VM_Version::proc_name_11_offset())));
841 __ movl(Address(rsi,0), rdx);
842
843 //
844 // return
845 //
846 __ bind(done);
847 __ popf();
848 __ pop(rsi);
849 __ pop(rbx);
850 __ pop(rbp);
851 __ ret(0);
852
853 # undef __
854
855 return start;
856 };
857 };
858
859 void VM_Version::get_processor_features() {
860
861 _cpu = 4; // 486 by default
862 _model = 0;
863 _stepping = 0;
864 _logical_processors_per_package = 1;
865 // i486 internal cache is both I&D and has a 16-byte line size
866 _L1_data_cache_line_size = 16;
867
868 // Get raw processor info
869
870 get_cpu_info_stub(&_cpuid_info);
871
872 assert_is_initialized();
873 _cpu = extended_cpu_family();
874 _model = extended_cpu_model();
875 _stepping = cpu_stepping();
876
877 if (cpu_family() > 4) { // it supports CPUID
878 _features = _cpuid_info.feature_flags(); // These can be changed by VM settings
879 _cpu_features = _features; // Preserve features
880 // Logical processors are only available on P4s and above,
881 // and only if hyperthreading is available.
882 _logical_processors_per_package = logical_processor_count();
883 _L1_data_cache_line_size = L1_line_size();
884 }
885
886 // xchg and xadd instructions
887 _supports_atomic_getset4 = true;
888 _supports_atomic_getadd4 = true;
889 _supports_atomic_getset8 = true;
890 _supports_atomic_getadd8 = true;
891
892 // OS should support SSE for x64 and hardware should support at least SSE2.
893 if (!VM_Version::supports_sse2()) {
894 vm_exit_during_initialization("Unknown x64 processor: SSE2 not supported");
895 }
896 // in 64 bit the use of SSE2 is the minimum
897 if (UseSSE < 2) UseSSE = 2;
898
899 // flush_icache_stub have to be generated first.
900 // That is why Icache line size is hard coded in ICache class,
901 // see icache_x86.hpp. It is also the reason why we can't use
902 // clflush instruction in 32-bit VM since it could be running
903 // on CPU which does not support it.
904 //
905 // The only thing we can do is to verify that flushed
906 // ICache::line_size has correct value.
907 guarantee(_cpuid_info.std_cpuid1_edx.bits.clflush != 0, "clflush is not supported");
908 // clflush_size is size in quadwords (8 bytes).
909 guarantee(_cpuid_info.std_cpuid1_ebx.bits.clflush_size == 8, "such clflush size is not supported");
910
911 // assigning this field effectively enables Unsafe.writebackMemory()
912 // by initing UnsafeConstant.DATA_CACHE_LINE_FLUSH_SIZE to non-zero
913 // that is only implemented on x86_64 and only if the OS plays ball
914 if (os::supports_map_sync()) {
915 // publish data cache line flush size to generic field, otherwise
916 // let if default to zero thereby disabling writeback
917 _data_cache_line_flush_size = _cpuid_info.std_cpuid1_ebx.bits.clflush_size * 8;
918 }
919
920 // Check if processor has Intel Ecore
921 if (FLAG_IS_DEFAULT(EnableX86ECoreOpts) && is_intel() && is_intel_server_family() &&
922 (_model == 0x97 || _model == 0xAA || _model == 0xAC || _model == 0xAF ||
923 _model == 0xCC || _model == 0xDD)) {
924 FLAG_SET_DEFAULT(EnableX86ECoreOpts, true);
925 }
926
927 if (UseSSE < 4) {
928 _features.clear_feature(CPU_SSE4_1);
929 _features.clear_feature(CPU_SSE4_2);
930 }
931
932 if (UseSSE < 3) {
933 _features.clear_feature(CPU_SSE3);
934 _features.clear_feature(CPU_SSSE3);
935 _features.clear_feature(CPU_SSE4A);
936 }
937
938 if (UseSSE < 2)
939 _features.clear_feature(CPU_SSE2);
940
941 if (UseSSE < 1)
942 _features.clear_feature(CPU_SSE);
943
944 //since AVX instructions is slower than SSE in some ZX cpus, force USEAVX=0.
945 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7))) {
946 UseAVX = 0;
947 }
948
949 // UseSSE is set to the smaller of what hardware supports and what
950 // the command line requires. I.e., you cannot set UseSSE to 2 on
951 // older Pentiums which do not support it.
952 int use_sse_limit = 0;
953 if (UseSSE > 0) {
954 if (UseSSE > 3 && supports_sse4_1()) {
955 use_sse_limit = 4;
956 } else if (UseSSE > 2 && supports_sse3()) {
957 use_sse_limit = 3;
958 } else if (UseSSE > 1 && supports_sse2()) {
959 use_sse_limit = 2;
960 } else if (UseSSE > 0 && supports_sse()) {
961 use_sse_limit = 1;
962 } else {
963 use_sse_limit = 0;
964 }
965 }
966 if (FLAG_IS_DEFAULT(UseSSE)) {
967 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
968 } else if (UseSSE > use_sse_limit) {
969 warning("UseSSE=%d is not supported on this CPU, setting it to UseSSE=%d", UseSSE, use_sse_limit);
970 FLAG_SET_DEFAULT(UseSSE, use_sse_limit);
971 }
972
973 // first try initial setting and detect what we can support
974 int use_avx_limit = 0;
975 if (UseAVX > 0) {
976 if (UseSSE < 4) {
977 // Don't use AVX if SSE is unavailable or has been disabled.
978 use_avx_limit = 0;
979 } else if (UseAVX > 2 && supports_evex()) {
980 use_avx_limit = 3;
981 } else if (UseAVX > 1 && supports_avx2()) {
982 use_avx_limit = 2;
983 } else if (UseAVX > 0 && supports_avx()) {
984 use_avx_limit = 1;
985 } else {
986 use_avx_limit = 0;
987 }
988 }
989 if (FLAG_IS_DEFAULT(UseAVX)) {
990 // Don't use AVX-512 on older Skylakes unless explicitly requested.
991 if (use_avx_limit > 2 && is_intel_skylake() && _stepping < 5) {
992 FLAG_SET_DEFAULT(UseAVX, 2);
993 } else {
994 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
995 }
996 }
997
998 if (UseAVX > use_avx_limit) {
999 if (UseSSE < 4) {
1000 warning("UseAVX=%d requires UseSSE=4, setting it to UseAVX=0", UseAVX);
1001 } else {
1002 warning("UseAVX=%d is not supported on this CPU, setting it to UseAVX=%d", UseAVX, use_avx_limit);
1003 }
1004 FLAG_SET_DEFAULT(UseAVX, use_avx_limit);
1005 }
1006
1007 if (UseAVX < 3) {
1008 _features.clear_feature(CPU_AVX512F);
1009 _features.clear_feature(CPU_AVX512DQ);
1010 _features.clear_feature(CPU_AVX512CD);
1011 _features.clear_feature(CPU_AVX512BW);
1012 _features.clear_feature(CPU_AVX512ER);
1013 _features.clear_feature(CPU_AVX512PF);
1014 _features.clear_feature(CPU_AVX512VL);
1015 _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1016 _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1017 _features.clear_feature(CPU_AVX512_VAES);
1018 _features.clear_feature(CPU_AVX512_VNNI);
1019 _features.clear_feature(CPU_AVX512_VBMI);
1020 _features.clear_feature(CPU_AVX512_VBMI2);
1021 _features.clear_feature(CPU_AVX512_BITALG);
1022 _features.clear_feature(CPU_AVX512_IFMA);
1023 _features.clear_feature(CPU_APX_F);
1024 _features.clear_feature(CPU_AVX512_FP16);
1025 _features.clear_feature(CPU_AVX10_1);
1026 _features.clear_feature(CPU_AVX10_2);
1027 }
1028
1029
1030 if (UseAVX < 2) {
1031 _features.clear_feature(CPU_AVX2);
1032 _features.clear_feature(CPU_AVX_IFMA);
1033 }
1034
1035 if (UseAVX < 1) {
1036 _features.clear_feature(CPU_AVX);
1037 _features.clear_feature(CPU_VZEROUPPER);
1038 _features.clear_feature(CPU_F16C);
1039 _features.clear_feature(CPU_SHA512);
1040 }
1041
1042 if (logical_processors_per_package() == 1) {
1043 // HT processor could be installed on a system which doesn't support HT.
1044 _features.clear_feature(CPU_HT);
1045 }
1046
1047 if (is_intel()) { // Intel cpus specific settings
1048 if (is_knights_family()) {
1049 _features.clear_feature(CPU_VZEROUPPER);
1050 _features.clear_feature(CPU_AVX512BW);
1051 _features.clear_feature(CPU_AVX512VL);
1052 _features.clear_feature(CPU_APX_F);
1053 _features.clear_feature(CPU_AVX512DQ);
1054 _features.clear_feature(CPU_AVX512_VNNI);
1055 _features.clear_feature(CPU_AVX512_VAES);
1056 _features.clear_feature(CPU_AVX512_VPOPCNTDQ);
1057 _features.clear_feature(CPU_AVX512_VPCLMULQDQ);
1058 _features.clear_feature(CPU_AVX512_VBMI);
1059 _features.clear_feature(CPU_AVX512_VBMI2);
1060 _features.clear_feature(CPU_CLWB);
1061 _features.clear_feature(CPU_FLUSHOPT);
1062 _features.clear_feature(CPU_GFNI);
1063 _features.clear_feature(CPU_AVX512_BITALG);
1064 _features.clear_feature(CPU_AVX512_IFMA);
1065 _features.clear_feature(CPU_AVX_IFMA);
1066 _features.clear_feature(CPU_AVX512_FP16);
1067 _features.clear_feature(CPU_AVX10_1);
1068 _features.clear_feature(CPU_AVX10_2);
1069 }
1070 }
1071
1072 // Currently APX support is only enabled for targets supporting AVX512VL feature.
1073 bool apx_supported = os_supports_apx_egprs() && supports_apx_f() && supports_avx512vl();
1074 if (UseAPX && !apx_supported) {
1075 warning("UseAPX is not supported on this CPU, setting it to false");
1076 FLAG_SET_DEFAULT(UseAPX, false);
1077 }
1078
1079 if (!UseAPX) {
1080 _features.clear_feature(CPU_APX_F);
1081 }
1082
1083 if (FLAG_IS_DEFAULT(IntelJccErratumMitigation)) {
1084 _has_intel_jcc_erratum = compute_has_intel_jcc_erratum();
1085 FLAG_SET_ERGO(IntelJccErratumMitigation, _has_intel_jcc_erratum);
1086 } else {
1087 _has_intel_jcc_erratum = IntelJccErratumMitigation;
1088 }
1089
1090 assert(supports_clflush(), "Always present");
1091 if (X86ICacheSync == -1) {
1092 // Auto-detect, choosing the best performant one that still flushes
1093 // the cache. We could switch to CPUID/SERIALIZE ("4"/"5") going forward.
1094 if (supports_clwb()) {
1095 FLAG_SET_ERGO(X86ICacheSync, 3);
1096 } else if (supports_clflushopt()) {
1097 FLAG_SET_ERGO(X86ICacheSync, 2);
1098 } else {
1099 FLAG_SET_ERGO(X86ICacheSync, 1);
1100 }
1101 } else {
1102 if ((X86ICacheSync == 2) && !supports_clflushopt()) {
1103 vm_exit_during_initialization("CPU does not support CLFLUSHOPT, unable to use X86ICacheSync=2");
1104 }
1105 if ((X86ICacheSync == 3) && !supports_clwb()) {
1106 vm_exit_during_initialization("CPU does not support CLWB, unable to use X86ICacheSync=3");
1107 }
1108 if ((X86ICacheSync == 5) && !supports_serialize()) {
1109 vm_exit_during_initialization("CPU does not support SERIALIZE, unable to use X86ICacheSync=5");
1110 }
1111 }
1112
1113 stringStream ss(2048);
1114 if (supports_hybrid()) {
1115 ss.print("(hybrid)");
1116 } else {
1117 ss.print("(%u cores per cpu, %u threads per core)", cores_per_cpu(), threads_per_core());
1118 }
1119 ss.print(" family %d model %d stepping %d microcode 0x%x",
1120 cpu_family(), _model, _stepping, os::cpu_microcode_revision());
1121 ss.print(", ");
1122 int features_offset = (int)ss.size();
1123 insert_features_names(_features, ss);
1124
1125 _cpu_info_string = ss.as_string(true);
1126 _features_string = _cpu_info_string + features_offset;
1127
1128 // Use AES instructions if available.
1129 if (supports_aes()) {
1130 if (FLAG_IS_DEFAULT(UseAES)) {
1131 FLAG_SET_DEFAULT(UseAES, true);
1132 }
1133 if (!UseAES) {
1134 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1135 warning("AES intrinsics require UseAES flag to be enabled. Intrinsics will be disabled.");
1136 }
1137 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1138 } else {
1139 if (UseSSE > 2) {
1140 if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1141 FLAG_SET_DEFAULT(UseAESIntrinsics, true);
1142 }
1143 } else {
1144 // The AES intrinsic stubs require AES instruction support (of course)
1145 // but also require sse3 mode or higher for instructions it use.
1146 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1147 warning("X86 AES intrinsics require SSE3 instructions or higher. Intrinsics will be disabled.");
1148 }
1149 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1150 }
1151
1152 // --AES-CTR begins--
1153 if (!UseAESIntrinsics) {
1154 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1155 warning("AES-CTR intrinsics require UseAESIntrinsics flag to be enabled. Intrinsics will be disabled.");
1156 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1157 }
1158 } else {
1159 if (supports_sse4_1()) {
1160 if (FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1161 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, true);
1162 }
1163 } else {
1164 // The AES-CTR intrinsic stubs require AES instruction support (of course)
1165 // but also require sse4.1 mode or higher for instructions it use.
1166 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1167 warning("X86 AES-CTR intrinsics require SSE4.1 instructions or higher. Intrinsics will be disabled.");
1168 }
1169 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1170 }
1171 }
1172 // --AES-CTR ends--
1173 }
1174 } else if (UseAES || UseAESIntrinsics || UseAESCTRIntrinsics) {
1175 if (UseAES && !FLAG_IS_DEFAULT(UseAES)) {
1176 warning("AES instructions are not available on this CPU");
1177 FLAG_SET_DEFAULT(UseAES, false);
1178 }
1179 if (UseAESIntrinsics && !FLAG_IS_DEFAULT(UseAESIntrinsics)) {
1180 warning("AES intrinsics are not available on this CPU");
1181 FLAG_SET_DEFAULT(UseAESIntrinsics, false);
1182 }
1183 if (UseAESCTRIntrinsics && !FLAG_IS_DEFAULT(UseAESCTRIntrinsics)) {
1184 warning("AES-CTR intrinsics are not available on this CPU");
1185 FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
1186 }
1187 }
1188
1189 // Use CLMUL instructions if available.
1190 if (supports_clmul()) {
1191 if (FLAG_IS_DEFAULT(UseCLMUL)) {
1192 UseCLMUL = true;
1193 }
1194 } else if (UseCLMUL) {
1195 if (!FLAG_IS_DEFAULT(UseCLMUL))
1196 warning("CLMUL instructions not available on this CPU (AVX may also be required)");
1197 FLAG_SET_DEFAULT(UseCLMUL, false);
1198 }
1199
1200 if (UseCLMUL && (UseSSE > 2)) {
1201 if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
1202 UseCRC32Intrinsics = true;
1203 }
1204 } else if (UseCRC32Intrinsics) {
1205 if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
1206 warning("CRC32 Intrinsics requires CLMUL instructions (not available on this CPU)");
1207 FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
1208 }
1209
1210 if (supports_avx2()) {
1211 if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1212 UseAdler32Intrinsics = true;
1213 }
1214 } else if (UseAdler32Intrinsics) {
1215 if (!FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
1216 warning("Adler32 Intrinsics requires avx2 instructions (not available on this CPU)");
1217 }
1218 FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
1219 }
1220
1221 if (supports_sse4_2() && supports_clmul()) {
1222 if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1223 UseCRC32CIntrinsics = true;
1224 }
1225 } else if (UseCRC32CIntrinsics) {
1226 if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
1227 warning("CRC32C intrinsics are not available on this CPU");
1228 }
1229 FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
1230 }
1231
1232 // GHASH/GCM intrinsics
1233 if (UseCLMUL && (UseSSE > 2)) {
1234 if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
1235 UseGHASHIntrinsics = true;
1236 }
1237 } else if (UseGHASHIntrinsics) {
1238 if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
1239 warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
1240 FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
1241 }
1242
1243 // ChaCha20 Intrinsics
1244 // As long as the system supports AVX as a baseline we can do a
1245 // SIMD-enabled block function. StubGenerator makes the determination
1246 // based on the VM capabilities whether to use an AVX2 or AVX512-enabled
1247 // version.
1248 if (UseAVX >= 1) {
1249 if (FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1250 UseChaCha20Intrinsics = true;
1251 }
1252 } else if (UseChaCha20Intrinsics) {
1253 if (!FLAG_IS_DEFAULT(UseChaCha20Intrinsics)) {
1254 warning("ChaCha20 intrinsic requires AVX instructions");
1255 }
1256 FLAG_SET_DEFAULT(UseChaCha20Intrinsics, false);
1257 }
1258
1259 // Kyber Intrinsics
1260 // Currently we only have them for AVX512
1261 #ifdef _LP64
1262 if (supports_evex() && supports_avx512bw()) {
1263 if (FLAG_IS_DEFAULT(UseKyberIntrinsics)) {
1264 UseKyberIntrinsics = true;
1265 }
1266 } else
1267 #endif
1268 if (UseKyberIntrinsics) {
1269 warning("Intrinsics for ML-KEM are not available on this CPU.");
1270 FLAG_SET_DEFAULT(UseKyberIntrinsics, false);
1271 }
1272
1273 // Dilithium Intrinsics
1274 // Currently we only have them for AVX512
1275 if (supports_evex() && supports_avx512bw()) {
1276 if (FLAG_IS_DEFAULT(UseDilithiumIntrinsics)) {
1277 UseDilithiumIntrinsics = true;
1278 }
1279 } else if (UseDilithiumIntrinsics) {
1280 warning("Intrinsics for ML-DSA are not available on this CPU.");
1281 FLAG_SET_DEFAULT(UseDilithiumIntrinsics, false);
1282 }
1283
1284 // Base64 Intrinsics (Check the condition for which the intrinsic will be active)
1285 if (UseAVX >= 2) {
1286 if (FLAG_IS_DEFAULT(UseBASE64Intrinsics)) {
1287 UseBASE64Intrinsics = true;
1288 }
1289 } else if (UseBASE64Intrinsics) {
1290 if (!FLAG_IS_DEFAULT(UseBASE64Intrinsics))
1291 warning("Base64 intrinsic requires EVEX instructions on this CPU");
1292 FLAG_SET_DEFAULT(UseBASE64Intrinsics, false);
1293 }
1294
1295 if (supports_fma()) {
1296 if (FLAG_IS_DEFAULT(UseFMA)) {
1297 UseFMA = true;
1298 }
1299 } else if (UseFMA) {
1300 warning("FMA instructions are not available on this CPU");
1301 FLAG_SET_DEFAULT(UseFMA, false);
1302 }
1303
1304 if (FLAG_IS_DEFAULT(UseMD5Intrinsics)) {
1305 UseMD5Intrinsics = true;
1306 }
1307
1308 if (supports_sha() || (supports_avx2() && supports_bmi2())) {
1309 if (FLAG_IS_DEFAULT(UseSHA)) {
1310 UseSHA = true;
1311 }
1312 } else if (UseSHA) {
1313 warning("SHA instructions are not available on this CPU");
1314 FLAG_SET_DEFAULT(UseSHA, false);
1315 }
1316
1317 if (supports_sha() && supports_sse4_1() && UseSHA) {
1318 if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
1319 FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
1320 }
1321 } else if (UseSHA1Intrinsics) {
1322 warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
1323 FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
1324 }
1325
1326 if (supports_sse4_1() && UseSHA) {
1327 if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
1328 FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
1329 }
1330 } else if (UseSHA256Intrinsics) {
1331 warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
1332 FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
1333 }
1334
1335 if (UseSHA && supports_avx2() && (supports_bmi2() || supports_sha512())) {
1336 if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
1337 FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
1338 }
1339 } else if (UseSHA512Intrinsics) {
1340 warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
1341 FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
1342 }
1343
1344 if (supports_evex() && supports_avx512bw()) {
1345 if (FLAG_IS_DEFAULT(UseSHA3Intrinsics)) {
1346 UseSHA3Intrinsics = true;
1347 }
1348 } else if (UseSHA3Intrinsics) {
1349 warning("Intrinsics for SHA3-224, SHA3-256, SHA3-384 and SHA3-512 crypto hash functions not available on this CPU.");
1350 FLAG_SET_DEFAULT(UseSHA3Intrinsics, false);
1351 }
1352
1353 if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
1354 FLAG_SET_DEFAULT(UseSHA, false);
1355 }
1356
1357 #if COMPILER2_OR_JVMCI
1358 int max_vector_size = 0;
1359 if (UseAVX == 0 || !os_supports_avx_vectors()) {
1360 // 16 byte vectors (in XMM) are supported with SSE2+
1361 max_vector_size = 16;
1362 } else if (UseAVX == 1 || UseAVX == 2) {
1363 // 32 bytes vectors (in YMM) are only supported with AVX+
1364 max_vector_size = 32;
1365 } else if (UseAVX > 2) {
1366 // 64 bytes vectors (in ZMM) are only supported with AVX 3
1367 max_vector_size = 64;
1368 }
1369
1370 int min_vector_size = 4; // We require MaxVectorSize to be at least 4 on 64bit
1371
1372 if (!FLAG_IS_DEFAULT(MaxVectorSize)) {
1373 if (MaxVectorSize < min_vector_size) {
1374 warning("MaxVectorSize must be at least %i on this platform", min_vector_size);
1375 FLAG_SET_DEFAULT(MaxVectorSize, min_vector_size);
1376 }
1377 if (MaxVectorSize > max_vector_size) {
1378 warning("MaxVectorSize must be at most %i on this platform", max_vector_size);
1379 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1380 }
1381 if (!is_power_of_2(MaxVectorSize)) {
1382 warning("MaxVectorSize must be a power of 2, setting to default: %i", max_vector_size);
1383 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1384 }
1385 } else {
1386 // If default, use highest supported configuration
1387 FLAG_SET_DEFAULT(MaxVectorSize, max_vector_size);
1388 }
1389
1390 #if defined(COMPILER2) && defined(ASSERT)
1391 if (MaxVectorSize > 0) {
1392 if (supports_avx() && PrintMiscellaneous && Verbose && TraceNewVectors) {
1393 tty->print_cr("State of YMM registers after signal handle:");
1394 int nreg = 4;
1395 const char* ymm_name[4] = {"0", "7", "8", "15"};
1396 for (int i = 0; i < nreg; i++) {
1397 tty->print("YMM%s:", ymm_name[i]);
1398 for (int j = 7; j >=0; j--) {
1399 tty->print(" %x", _cpuid_info.ymm_save[i*8 + j]);
1400 }
1401 tty->cr();
1402 }
1403 }
1404 }
1405 #endif // COMPILER2 && ASSERT
1406
1407 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1408 if (FLAG_IS_DEFAULT(UsePoly1305Intrinsics)) {
1409 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, true);
1410 }
1411 } else if (UsePoly1305Intrinsics) {
1412 warning("Intrinsics for Poly1305 crypto hash functions not available on this CPU.");
1413 FLAG_SET_DEFAULT(UsePoly1305Intrinsics, false);
1414 }
1415
1416 if ((supports_avx512ifma() && supports_avx512vlbw()) || supports_avxifma()) {
1417 if (FLAG_IS_DEFAULT(UseIntPolyIntrinsics)) {
1418 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, true);
1419 }
1420 } else if (UseIntPolyIntrinsics) {
1421 warning("Intrinsics for Polynomial crypto functions not available on this CPU.");
1422 FLAG_SET_DEFAULT(UseIntPolyIntrinsics, false);
1423 }
1424
1425 if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
1426 UseMultiplyToLenIntrinsic = true;
1427 }
1428 if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
1429 UseSquareToLenIntrinsic = true;
1430 }
1431 if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
1432 UseMulAddIntrinsic = true;
1433 }
1434 if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
1435 UseMontgomeryMultiplyIntrinsic = true;
1436 }
1437 if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
1438 UseMontgomerySquareIntrinsic = true;
1439 }
1440 #endif // COMPILER2_OR_JVMCI
1441
1442 // On new cpus instructions which update whole XMM register should be used
1443 // to prevent partial register stall due to dependencies on high half.
1444 //
1445 // UseXmmLoadAndClearUpper == true --> movsd(xmm, mem)
1446 // UseXmmLoadAndClearUpper == false --> movlpd(xmm, mem)
1447 // UseXmmRegToRegMoveAll == true --> movaps(xmm, xmm), movapd(xmm, xmm).
1448 // UseXmmRegToRegMoveAll == false --> movss(xmm, xmm), movsd(xmm, xmm).
1449
1450
1451 if (is_zx()) { // ZX cpus specific settings
1452 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1453 UseStoreImmI16 = false; // don't use it on ZX cpus
1454 }
1455 if ((cpu_family() == 6) || (cpu_family() == 7)) {
1456 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1457 // Use it on all ZX cpus
1458 UseAddressNop = true;
1459 }
1460 }
1461 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1462 UseXmmLoadAndClearUpper = true; // use movsd on all ZX cpus
1463 }
1464 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1465 if (supports_sse3()) {
1466 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new ZX cpus
1467 } else {
1468 UseXmmRegToRegMoveAll = false;
1469 }
1470 }
1471 if (((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse3()) { // new ZX cpus
1472 #ifdef COMPILER2
1473 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1474 // For new ZX cpus do the next optimization:
1475 // don't align the beginning of a loop if there are enough instructions
1476 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1477 // in current fetch line (OptoLoopAlignment) or the padding
1478 // is big (> MaxLoopPad).
1479 // Set MaxLoopPad to 11 for new ZX cpus to reduce number of
1480 // generated NOP instructions. 11 is the largest size of one
1481 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1482 MaxLoopPad = 11;
1483 }
1484 #endif // COMPILER2
1485 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1486 UseXMMForArrayCopy = true; // use SSE2 movq on new ZX cpus
1487 }
1488 if (supports_sse4_2()) { // new ZX cpus
1489 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1490 UseUnalignedLoadStores = true; // use movdqu on newest ZX cpus
1491 }
1492 }
1493 }
1494
1495 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1496 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1497 }
1498 }
1499
1500 if (is_amd_family()) { // AMD cpus specific settings
1501 if (supports_sse2() && FLAG_IS_DEFAULT(UseAddressNop)) {
1502 // Use it on new AMD cpus starting from Opteron.
1503 UseAddressNop = true;
1504 }
1505 if (supports_sse2() && FLAG_IS_DEFAULT(UseNewLongLShift)) {
1506 // Use it on new AMD cpus starting from Opteron.
1507 UseNewLongLShift = true;
1508 }
1509 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1510 if (supports_sse4a()) {
1511 UseXmmLoadAndClearUpper = true; // use movsd only on '10h' Opteron
1512 } else {
1513 UseXmmLoadAndClearUpper = false;
1514 }
1515 }
1516 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1517 if (supports_sse4a()) {
1518 UseXmmRegToRegMoveAll = true; // use movaps, movapd only on '10h'
1519 } else {
1520 UseXmmRegToRegMoveAll = false;
1521 }
1522 }
1523 if (FLAG_IS_DEFAULT(UseXmmI2F)) {
1524 if (supports_sse4a()) {
1525 UseXmmI2F = true;
1526 } else {
1527 UseXmmI2F = false;
1528 }
1529 }
1530 if (FLAG_IS_DEFAULT(UseXmmI2D)) {
1531 if (supports_sse4a()) {
1532 UseXmmI2D = true;
1533 } else {
1534 UseXmmI2D = false;
1535 }
1536 }
1537
1538 // some defaults for AMD family 15h
1539 if (cpu_family() == 0x15) {
1540 // On family 15h processors default is no sw prefetch
1541 if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1542 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1543 }
1544 // Also, if some other prefetch style is specified, default instruction type is PREFETCHW
1545 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1546 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1547 }
1548 // On family 15h processors use XMM and UnalignedLoadStores for Array Copy
1549 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1550 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1551 }
1552 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1553 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1554 }
1555 }
1556
1557 #ifdef COMPILER2
1558 if (cpu_family() < 0x17 && MaxVectorSize > 16) {
1559 // Limit vectors size to 16 bytes on AMD cpus < 17h.
1560 FLAG_SET_DEFAULT(MaxVectorSize, 16);
1561 }
1562 #endif // COMPILER2
1563
1564 // Some defaults for AMD family >= 17h && Hygon family 18h
1565 if (cpu_family() >= 0x17) {
1566 // On family >=17h processors use XMM and UnalignedLoadStores
1567 // for Array Copy
1568 if (supports_sse2() && FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1569 FLAG_SET_DEFAULT(UseXMMForArrayCopy, true);
1570 }
1571 if (supports_sse2() && FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1572 FLAG_SET_DEFAULT(UseUnalignedLoadStores, true);
1573 }
1574 #ifdef COMPILER2
1575 if (supports_sse4_2() && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1576 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1577 }
1578 #endif
1579 }
1580 }
1581
1582 if (is_intel()) { // Intel cpus specific settings
1583 if (FLAG_IS_DEFAULT(UseStoreImmI16)) {
1584 UseStoreImmI16 = false; // don't use it on Intel cpus
1585 }
1586 if (is_intel_server_family() || cpu_family() == 15) {
1587 if (FLAG_IS_DEFAULT(UseAddressNop)) {
1588 // Use it on all Intel cpus starting from PentiumPro
1589 UseAddressNop = true;
1590 }
1591 }
1592 if (FLAG_IS_DEFAULT(UseXmmLoadAndClearUpper)) {
1593 UseXmmLoadAndClearUpper = true; // use movsd on all Intel cpus
1594 }
1595 if (FLAG_IS_DEFAULT(UseXmmRegToRegMoveAll)) {
1596 if (supports_sse3()) {
1597 UseXmmRegToRegMoveAll = true; // use movaps, movapd on new Intel cpus
1598 } else {
1599 UseXmmRegToRegMoveAll = false;
1600 }
1601 }
1602 if (is_intel_server_family() && supports_sse3()) { // New Intel cpus
1603 #ifdef COMPILER2
1604 if (FLAG_IS_DEFAULT(MaxLoopPad)) {
1605 // For new Intel cpus do the next optimization:
1606 // don't align the beginning of a loop if there are enough instructions
1607 // left (NumberOfLoopInstrToAlign defined in c2_globals.hpp)
1608 // in current fetch line (OptoLoopAlignment) or the padding
1609 // is big (> MaxLoopPad).
1610 // Set MaxLoopPad to 11 for new Intel cpus to reduce number of
1611 // generated NOP instructions. 11 is the largest size of one
1612 // address NOP instruction '0F 1F' (see Assembler::nop(i)).
1613 MaxLoopPad = 11;
1614 }
1615 #endif // COMPILER2
1616
1617 if (FLAG_IS_DEFAULT(UseXMMForArrayCopy)) {
1618 UseXMMForArrayCopy = true; // use SSE2 movq on new Intel cpus
1619 }
1620 if ((supports_sse4_2() && supports_ht()) || supports_avx()) { // Newest Intel cpus
1621 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1622 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1623 }
1624 }
1625 }
1626 if (is_atom_family() || is_knights_family()) {
1627 #ifdef COMPILER2
1628 if (FLAG_IS_DEFAULT(OptoScheduling)) {
1629 OptoScheduling = true;
1630 }
1631 #endif
1632 if (supports_sse4_2()) { // Silvermont
1633 if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
1634 UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
1635 }
1636 }
1637 if (FLAG_IS_DEFAULT(UseIncDec)) {
1638 FLAG_SET_DEFAULT(UseIncDec, false);
1639 }
1640 }
1641 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr) && supports_3dnow_prefetch()) {
1642 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1643 }
1644 #ifdef COMPILER2
1645 if (UseAVX > 2) {
1646 if (FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) ||
1647 (!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize) &&
1648 ArrayOperationPartialInlineSize != 0 &&
1649 ArrayOperationPartialInlineSize != 16 &&
1650 ArrayOperationPartialInlineSize != 32 &&
1651 ArrayOperationPartialInlineSize != 64)) {
1652 int inline_size = 0;
1653 if (MaxVectorSize >= 64 && AVX3Threshold == 0) {
1654 inline_size = 64;
1655 } else if (MaxVectorSize >= 32) {
1656 inline_size = 32;
1657 } else if (MaxVectorSize >= 16) {
1658 inline_size = 16;
1659 }
1660 if(!FLAG_IS_DEFAULT(ArrayOperationPartialInlineSize)) {
1661 warning("Setting ArrayOperationPartialInlineSize as %d", inline_size);
1662 }
1663 ArrayOperationPartialInlineSize = inline_size;
1664 }
1665
1666 if (ArrayOperationPartialInlineSize > MaxVectorSize) {
1667 ArrayOperationPartialInlineSize = MaxVectorSize >= 16 ? MaxVectorSize : 0;
1668 if (ArrayOperationPartialInlineSize) {
1669 warning("Setting ArrayOperationPartialInlineSize as MaxVectorSize=%zd", MaxVectorSize);
1670 } else {
1671 warning("Setting ArrayOperationPartialInlineSize as %zd", ArrayOperationPartialInlineSize);
1672 }
1673 }
1674 }
1675 #endif
1676 }
1677
1678 #ifdef COMPILER2
1679 if (FLAG_IS_DEFAULT(OptimizeFill)) {
1680 if (MaxVectorSize < 32 || (!EnableX86ECoreOpts && !VM_Version::supports_avx512vlbw())) {
1681 OptimizeFill = false;
1682 }
1683 }
1684 #endif
1685 if (supports_sse4_2()) {
1686 if (FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1687 FLAG_SET_DEFAULT(UseSSE42Intrinsics, true);
1688 }
1689 } else {
1690 if (UseSSE42Intrinsics && !FLAG_IS_DEFAULT(UseSSE42Intrinsics)) {
1691 warning("SSE4.2 intrinsics require SSE4.2 instructions or higher. Intrinsics will be disabled.");
1692 }
1693 FLAG_SET_DEFAULT(UseSSE42Intrinsics, false);
1694 }
1695 if (UseSSE42Intrinsics) {
1696 if (FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic)) {
1697 UseVectorizedMismatchIntrinsic = true;
1698 }
1699 } else if (UseVectorizedMismatchIntrinsic) {
1700 if (!FLAG_IS_DEFAULT(UseVectorizedMismatchIntrinsic))
1701 warning("vectorizedMismatch intrinsics are not available on this CPU");
1702 FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
1703 }
1704 if (UseAVX >= 2) {
1705 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, true);
1706 } else if (UseVectorizedHashCodeIntrinsic) {
1707 if (!FLAG_IS_DEFAULT(UseVectorizedHashCodeIntrinsic))
1708 warning("vectorizedHashCode intrinsics are not available on this CPU");
1709 FLAG_SET_DEFAULT(UseVectorizedHashCodeIntrinsic, false);
1710 }
1711
1712 // Use count leading zeros count instruction if available.
1713 if (supports_lzcnt()) {
1714 if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
1715 UseCountLeadingZerosInstruction = true;
1716 }
1717 } else if (UseCountLeadingZerosInstruction) {
1718 warning("lzcnt instruction is not available on this CPU");
1719 FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, false);
1720 }
1721
1722 // Use count trailing zeros instruction if available
1723 if (supports_bmi1()) {
1724 // tzcnt does not require VEX prefix
1725 if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
1726 if (!UseBMI1Instructions && !FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1727 // Don't use tzcnt if BMI1 is switched off on command line.
1728 UseCountTrailingZerosInstruction = false;
1729 } else {
1730 UseCountTrailingZerosInstruction = true;
1731 }
1732 }
1733 } else if (UseCountTrailingZerosInstruction) {
1734 warning("tzcnt instruction is not available on this CPU");
1735 FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, false);
1736 }
1737
1738 // BMI instructions (except tzcnt) use an encoding with VEX prefix.
1739 // VEX prefix is generated only when AVX > 0.
1740 if (supports_bmi1() && supports_avx()) {
1741 if (FLAG_IS_DEFAULT(UseBMI1Instructions)) {
1742 UseBMI1Instructions = true;
1743 }
1744 } else if (UseBMI1Instructions) {
1745 warning("BMI1 instructions are not available on this CPU (AVX is also required)");
1746 FLAG_SET_DEFAULT(UseBMI1Instructions, false);
1747 }
1748
1749 if (supports_bmi2() && supports_avx()) {
1750 if (FLAG_IS_DEFAULT(UseBMI2Instructions)) {
1751 UseBMI2Instructions = true;
1752 }
1753 } else if (UseBMI2Instructions) {
1754 warning("BMI2 instructions are not available on this CPU (AVX is also required)");
1755 FLAG_SET_DEFAULT(UseBMI2Instructions, false);
1756 }
1757
1758 // Use population count instruction if available.
1759 if (supports_popcnt()) {
1760 if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
1761 UsePopCountInstruction = true;
1762 }
1763 } else if (UsePopCountInstruction) {
1764 warning("POPCNT instruction is not available on this CPU");
1765 FLAG_SET_DEFAULT(UsePopCountInstruction, false);
1766 }
1767
1768 // Use fast-string operations if available.
1769 if (supports_erms()) {
1770 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1771 UseFastStosb = true;
1772 }
1773 } else if (UseFastStosb) {
1774 warning("fast-string operations are not available on this CPU");
1775 FLAG_SET_DEFAULT(UseFastStosb, false);
1776 }
1777
1778 // For AMD Processors use XMM/YMM MOVDQU instructions
1779 // for Object Initialization as default
1780 if (is_amd() && cpu_family() >= 0x19) {
1781 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1782 UseFastStosb = false;
1783 }
1784 }
1785
1786 #ifdef COMPILER2
1787 if (is_intel() && MaxVectorSize > 16) {
1788 if (FLAG_IS_DEFAULT(UseFastStosb)) {
1789 UseFastStosb = false;
1790 }
1791 }
1792 #endif
1793
1794 // Use XMM/YMM MOVDQU instruction for Object Initialization
1795 if (UseUnalignedLoadStores) {
1796 if (FLAG_IS_DEFAULT(UseXMMForObjInit)) {
1797 UseXMMForObjInit = true;
1798 }
1799 } else if (UseXMMForObjInit) {
1800 warning("UseXMMForObjInit requires SSE2 and unaligned load/stores. Feature is switched off.");
1801 FLAG_SET_DEFAULT(UseXMMForObjInit, false);
1802 }
1803
1804 #ifdef COMPILER2
1805 if (FLAG_IS_DEFAULT(AlignVector)) {
1806 // Modern processors allow misaligned memory operations for vectors.
1807 AlignVector = !UseUnalignedLoadStores;
1808 }
1809 #endif // COMPILER2
1810
1811 if (FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
1812 if (AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch()) {
1813 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
1814 } else if (!supports_sse() && supports_3dnow_prefetch()) {
1815 FLAG_SET_DEFAULT(AllocatePrefetchInstr, 3);
1816 }
1817 }
1818
1819 // Allocation prefetch settings
1820 int cache_line_size = checked_cast<int>(prefetch_data_size());
1821 if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize) &&
1822 (cache_line_size > AllocatePrefetchStepSize)) {
1823 FLAG_SET_DEFAULT(AllocatePrefetchStepSize, cache_line_size);
1824 }
1825
1826 if ((AllocatePrefetchDistance == 0) && (AllocatePrefetchStyle != 0)) {
1827 assert(!FLAG_IS_DEFAULT(AllocatePrefetchDistance), "default value should not be 0");
1828 if (!FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
1829 warning("AllocatePrefetchDistance is set to 0 which disable prefetching. Ignoring AllocatePrefetchStyle flag.");
1830 }
1831 FLAG_SET_DEFAULT(AllocatePrefetchStyle, 0);
1832 }
1833
1834 if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
1835 bool use_watermark_prefetch = (AllocatePrefetchStyle == 2);
1836 FLAG_SET_DEFAULT(AllocatePrefetchDistance, allocate_prefetch_distance(use_watermark_prefetch));
1837 }
1838
1839 if (is_intel() && is_intel_server_family() && supports_sse3()) {
1840 if (FLAG_IS_DEFAULT(AllocatePrefetchLines) &&
1841 supports_sse4_2() && supports_ht()) { // Nehalem based cpus
1842 FLAG_SET_DEFAULT(AllocatePrefetchLines, 4);
1843 }
1844 #ifdef COMPILER2
1845 if (FLAG_IS_DEFAULT(UseFPUForSpilling) && supports_sse4_2()) {
1846 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1847 }
1848 #endif
1849 }
1850
1851 if (is_zx() && ((cpu_family() == 6) || (cpu_family() == 7)) && supports_sse4_2()) {
1852 #ifdef COMPILER2
1853 if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
1854 FLAG_SET_DEFAULT(UseFPUForSpilling, true);
1855 }
1856 #endif
1857 }
1858
1859 // Prefetch settings
1860
1861 // Prefetch interval for gc copy/scan == 9 dcache lines. Derived from
1862 // 50-warehouse specjbb runs on a 2-way 1.8ghz opteron using a 4gb heap.
1863 // Tested intervals from 128 to 2048 in increments of 64 == one cache line.
1864 // 256 bytes (4 dcache lines) was the nearest runner-up to 576.
1865
1866 // gc copy/scan is disabled if prefetchw isn't supported, because
1867 // Prefetch::write emits an inlined prefetchw on Linux.
1868 // Do not use the 3dnow prefetchw instruction. It isn't supported on em64t.
1869 // The used prefetcht0 instruction works for both amd64 and em64t.
1870
1871 if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes)) {
1872 FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 576);
1873 }
1874 if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes)) {
1875 FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 576);
1876 }
1877
1878 if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
1879 (cache_line_size > ContendedPaddingWidth))
1880 ContendedPaddingWidth = cache_line_size;
1881
1882 // This machine allows unaligned memory accesses
1883 if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
1884 FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
1885 }
1886
1887 #ifndef PRODUCT
1888 if (log_is_enabled(Info, os, cpu)) {
1889 LogStream ls(Log(os, cpu)::info());
1890 outputStream* log = &ls;
1891 log->print_cr("Logical CPUs per core: %u",
1892 logical_processors_per_package());
1893 log->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
1894 log->print("UseSSE=%d", UseSSE);
1895 if (UseAVX > 0) {
1896 log->print(" UseAVX=%d", UseAVX);
1897 }
1898 if (UseAES) {
1899 log->print(" UseAES=1");
1900 }
1901 #ifdef COMPILER2
1902 if (MaxVectorSize > 0) {
1903 log->print(" MaxVectorSize=%d", (int) MaxVectorSize);
1904 }
1905 #endif
1906 log->cr();
1907 log->print("Allocation");
1908 if (AllocatePrefetchStyle <= 0) {
1909 log->print_cr(": no prefetching");
1910 } else {
1911 log->print(" prefetching: ");
1912 if (AllocatePrefetchInstr == 0) {
1913 log->print("PREFETCHNTA");
1914 } else if (AllocatePrefetchInstr == 1) {
1915 log->print("PREFETCHT0");
1916 } else if (AllocatePrefetchInstr == 2) {
1917 log->print("PREFETCHT2");
1918 } else if (AllocatePrefetchInstr == 3) {
1919 log->print("PREFETCHW");
1920 }
1921 if (AllocatePrefetchLines > 1) {
1922 log->print_cr(" at distance %d, %d lines of %d bytes", AllocatePrefetchDistance, AllocatePrefetchLines, AllocatePrefetchStepSize);
1923 } else {
1924 log->print_cr(" at distance %d, one line of %d bytes", AllocatePrefetchDistance, AllocatePrefetchStepSize);
1925 }
1926 }
1927
1928 if (PrefetchCopyIntervalInBytes > 0) {
1929 log->print_cr("PrefetchCopyIntervalInBytes %d", (int) PrefetchCopyIntervalInBytes);
1930 }
1931 if (PrefetchScanIntervalInBytes > 0) {
1932 log->print_cr("PrefetchScanIntervalInBytes %d", (int) PrefetchScanIntervalInBytes);
1933 }
1934 if (ContendedPaddingWidth > 0) {
1935 log->print_cr("ContendedPaddingWidth %d", (int) ContendedPaddingWidth);
1936 }
1937 }
1938 #endif // !PRODUCT
1939 if (FLAG_IS_DEFAULT(UseSignumIntrinsic)) {
1940 FLAG_SET_DEFAULT(UseSignumIntrinsic, true);
1941 }
1942 if (FLAG_IS_DEFAULT(UseCopySignIntrinsic)) {
1943 FLAG_SET_DEFAULT(UseCopySignIntrinsic, true);
1944 }
1945 }
1946
1947 void VM_Version::print_platform_virtualization_info(outputStream* st) {
1948 VirtualizationType vrt = VM_Version::get_detected_virtualization();
1949 if (vrt == XenHVM) {
1950 st->print_cr("Xen hardware-assisted virtualization detected");
1951 } else if (vrt == KVM) {
1952 st->print_cr("KVM virtualization detected");
1953 } else if (vrt == VMWare) {
1954 st->print_cr("VMWare virtualization detected");
1955 VirtualizationSupport::print_virtualization_info(st);
1956 } else if (vrt == HyperV) {
1957 st->print_cr("Hyper-V virtualization detected");
1958 } else if (vrt == HyperVRole) {
1959 st->print_cr("Hyper-V role detected");
1960 }
1961 }
1962
1963 bool VM_Version::compute_has_intel_jcc_erratum() {
1964 if (!is_intel_family_core()) {
1965 // Only Intel CPUs are affected.
1966 return false;
1967 }
1968 // The following table of affected CPUs is based on the following document released by Intel:
1969 // https://www.intel.com/content/dam/support/us/en/documents/processors/mitigations-jump-conditional-code-erratum.pdf
1970 switch (_model) {
1971 case 0x8E:
1972 // 06_8EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1973 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U
1974 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake U 23e
1975 // 06_8EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Y
1976 // 06_8EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake U43e
1977 // 06_8EH | B | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1978 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Amber Lake Y
1979 // 06_8EH | C | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U42
1980 // 06_8EH | C | 8th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Whiskey Lake U
1981 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xC;
1982 case 0x4E:
1983 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake U
1984 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake U23e
1985 // 06_4E | 3 | 6th Generation Intel(R) Core(TM) Processors based on microarchitecture code name Skylake Y
1986 return _stepping == 0x3;
1987 case 0x55:
1988 // 06_55H | 4 | Intel(R) Xeon(R) Processor D Family based on microarchitecture code name Skylake D, Bakerville
1989 // 06_55H | 4 | Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Skylake Server
1990 // 06_55H | 4 | Intel(R) Xeon(R) Processor W Family based on microarchitecture code name Skylake W
1991 // 06_55H | 4 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Skylake X
1992 // 06_55H | 4 | Intel(R) Xeon(R) Processor E3 v5 Family based on microarchitecture code name Skylake Xeon E3
1993 // 06_55 | 7 | 2nd Generation Intel(R) Xeon(R) Scalable Processors based on microarchitecture code name Cascade Lake (server)
1994 return _stepping == 0x4 || _stepping == 0x7;
1995 case 0x5E:
1996 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake H
1997 // 06_5E | 3 | 6th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Skylake S
1998 return _stepping == 0x3;
1999 case 0x9E:
2000 // 06_9EH | 9 | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake G
2001 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake H
2002 // 06_9EH | 9 | 7th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake S
2003 // 06_9EH | 9 | Intel(R) Core(TM) X-series Processors based on microarchitecture code name Kaby Lake X
2004 // 06_9EH | 9 | Intel(R) Xeon(R) Processor E3 v6 Family Kaby Lake Xeon E3
2005 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake H
2006 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S
2007 // 06_9EH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (6+2) x/KBP
2008 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (6+2)
2009 // 06_9EH | A | Intel(R) Xeon(R) Processor E Family based on microarchitecture code name Coffee Lake S (4+2)
2010 // 06_9EH | B | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (4+2)
2011 // 06_9EH | B | Intel(R) Celeron(R) Processor G Series based on microarchitecture code name Coffee Lake S (4+2)
2012 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecturecode name Coffee Lake H (8+2)
2013 // 06_9EH | D | 9th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Coffee Lake S (8+2)
2014 return _stepping == 0x9 || _stepping == 0xA || _stepping == 0xB || _stepping == 0xD;
2015 case 0xA5:
2016 // Not in Intel documentation.
2017 // 06_A5H | | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake S/H
2018 return true;
2019 case 0xA6:
2020 // 06_A6H | 0 | 10th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Comet Lake U62
2021 return _stepping == 0x0;
2022 case 0xAE:
2023 // 06_AEH | A | 8th Generation Intel(R) Core(TM) Processor Family based on microarchitecture code name Kaby Lake Refresh U (4+2)
2024 return _stepping == 0xA;
2025 default:
2026 // If we are running on another intel machine not recognized in the table, we are okay.
2027 return false;
2028 }
2029 }
2030
2031 // On Xen, the cpuid instruction returns
2032 // eax / registers[0]: Version of Xen
2033 // ebx / registers[1]: chars 'XenV'
2034 // ecx / registers[2]: chars 'MMXe'
2035 // edx / registers[3]: chars 'nVMM'
2036 //
2037 // On KVM / VMWare / MS Hyper-V, the cpuid instruction returns
2038 // ebx / registers[1]: chars 'KVMK' / 'VMwa' / 'Micr'
2039 // ecx / registers[2]: chars 'VMKV' / 'reVM' / 'osof'
2040 // edx / registers[3]: chars 'M' / 'ware' / 't Hv'
2041 //
2042 // more information :
2043 // https://kb.vmware.com/s/article/1009458
2044 //
2045 void VM_Version::check_virtualizations() {
2046 uint32_t registers[4] = {0};
2047 char signature[13] = {0};
2048
2049 // Xen cpuid leaves can be found 0x100 aligned boundary starting
2050 // from 0x40000000 until 0x40010000.
2051 // https://lists.linuxfoundation.org/pipermail/virtualization/2012-May/019974.html
2052 for (int leaf = 0x40000000; leaf < 0x40010000; leaf += 0x100) {
2053 detect_virt_stub(leaf, registers);
2054 memcpy(signature, ®isters[1], 12);
2055
2056 if (strncmp("VMwareVMware", signature, 12) == 0) {
2057 Abstract_VM_Version::_detected_virtualization = VMWare;
2058 // check for extended metrics from guestlib
2059 VirtualizationSupport::initialize();
2060 } else if (strncmp("Microsoft Hv", signature, 12) == 0) {
2061 Abstract_VM_Version::_detected_virtualization = HyperV;
2062 #ifdef _WINDOWS
2063 // CPUID leaf 0x40000007 is available to the root partition only.
2064 // See Hypervisor Top Level Functional Specification section 2.4.8 for more details.
2065 // https://github.com/MicrosoftDocs/Virtualization-Documentation/raw/master/tlfs/Hypervisor%20Top%20Level%20Functional%20Specification%20v6.0b.pdf
2066 detect_virt_stub(0x40000007, registers);
2067 if ((registers[0] != 0x0) ||
2068 (registers[1] != 0x0) ||
2069 (registers[2] != 0x0) ||
2070 (registers[3] != 0x0)) {
2071 Abstract_VM_Version::_detected_virtualization = HyperVRole;
2072 }
2073 #endif
2074 } else if (strncmp("KVMKVMKVM", signature, 9) == 0) {
2075 Abstract_VM_Version::_detected_virtualization = KVM;
2076 } else if (strncmp("XenVMMXenVMM", signature, 12) == 0) {
2077 Abstract_VM_Version::_detected_virtualization = XenHVM;
2078 }
2079 }
2080 }
2081
2082 #ifdef COMPILER2
2083 // Determine if it's running on Cascade Lake using default options.
2084 bool VM_Version::is_default_intel_cascade_lake() {
2085 return FLAG_IS_DEFAULT(UseAVX) &&
2086 FLAG_IS_DEFAULT(MaxVectorSize) &&
2087 UseAVX > 2 &&
2088 is_intel_cascade_lake();
2089 }
2090 #endif
2091
2092 bool VM_Version::is_intel_cascade_lake() {
2093 return is_intel_skylake() && _stepping >= 5;
2094 }
2095
2096 bool VM_Version::is_intel_darkmont() {
2097 return is_intel() && is_intel_server_family() && (_model == 0xCC || _model == 0xDD);
2098 }
2099
2100 // avx3_threshold() sets the threshold at which 64-byte instructions are used
2101 // for implementing the array copy and clear operations.
2102 // The Intel platforms that supports the serialize instruction
2103 // has improved implementation of 64-byte load/stores and so the default
2104 // threshold is set to 0 for these platforms.
2105 int VM_Version::avx3_threshold() {
2106 return (is_intel_server_family() &&
2107 supports_serialize() &&
2108 FLAG_IS_DEFAULT(AVX3Threshold)) ? 0 : AVX3Threshold;
2109 }
2110
2111 void VM_Version::clear_apx_test_state() {
2112 clear_apx_test_state_stub();
2113 }
2114
2115 static bool _vm_version_initialized = false;
2116
2117 void VM_Version::initialize() {
2118 ResourceMark rm;
2119
2120 // Making this stub must be FIRST use of assembler
2121 stub_blob = BufferBlob::create("VM_Version stub", stub_size);
2122 if (stub_blob == nullptr) {
2123 vm_exit_during_initialization("Unable to allocate stub for VM_Version");
2124 }
2125 CodeBuffer c(stub_blob);
2126 VM_Version_StubGenerator g(&c);
2127
2128 get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
2129 g.generate_get_cpu_info());
2130 detect_virt_stub = CAST_TO_FN_PTR(detect_virt_stub_t,
2131 g.generate_detect_virt());
2132 clear_apx_test_state_stub = CAST_TO_FN_PTR(clear_apx_test_state_t,
2133 g.clear_apx_test_state());
2134 get_processor_features();
2135
2136 Assembler::precompute_instructions();
2137
2138 if (VM_Version::supports_hv()) { // Supports hypervisor
2139 check_virtualizations();
2140 }
2141 _vm_version_initialized = true;
2142 }
2143
2144 typedef enum {
2145 CPU_FAMILY_8086_8088 = 0,
2146 CPU_FAMILY_INTEL_286 = 2,
2147 CPU_FAMILY_INTEL_386 = 3,
2148 CPU_FAMILY_INTEL_486 = 4,
2149 CPU_FAMILY_PENTIUM = 5,
2150 CPU_FAMILY_PENTIUMPRO = 6, // Same family several models
2151 CPU_FAMILY_PENTIUM_4 = 0xF
2152 } FamilyFlag;
2153
2154 typedef enum {
2155 RDTSCP_FLAG = 0x08000000, // bit 27
2156 INTEL64_FLAG = 0x20000000 // bit 29
2157 } _featureExtendedEdxFlag;
2158
2159 typedef enum {
2160 FPU_FLAG = 0x00000001,
2161 VME_FLAG = 0x00000002,
2162 DE_FLAG = 0x00000004,
2163 PSE_FLAG = 0x00000008,
2164 TSC_FLAG = 0x00000010,
2165 MSR_FLAG = 0x00000020,
2166 PAE_FLAG = 0x00000040,
2167 MCE_FLAG = 0x00000080,
2168 CX8_FLAG = 0x00000100,
2169 APIC_FLAG = 0x00000200,
2170 SEP_FLAG = 0x00000800,
2171 MTRR_FLAG = 0x00001000,
2172 PGE_FLAG = 0x00002000,
2173 MCA_FLAG = 0x00004000,
2174 CMOV_FLAG = 0x00008000,
2175 PAT_FLAG = 0x00010000,
2176 PSE36_FLAG = 0x00020000,
2177 PSNUM_FLAG = 0x00040000,
2178 CLFLUSH_FLAG = 0x00080000,
2179 DTS_FLAG = 0x00200000,
2180 ACPI_FLAG = 0x00400000,
2181 MMX_FLAG = 0x00800000,
2182 FXSR_FLAG = 0x01000000,
2183 SSE_FLAG = 0x02000000,
2184 SSE2_FLAG = 0x04000000,
2185 SS_FLAG = 0x08000000,
2186 HTT_FLAG = 0x10000000,
2187 TM_FLAG = 0x20000000
2188 } FeatureEdxFlag;
2189
2190 static BufferBlob* cpuid_brand_string_stub_blob;
2191 static const int cpuid_brand_string_stub_size = 550;
2192
2193 extern "C" {
2194 typedef void (*getCPUIDBrandString_stub_t)(void*);
2195 }
2196
2197 static getCPUIDBrandString_stub_t getCPUIDBrandString_stub = nullptr;
2198
2199 // VM_Version statics
2200 enum {
2201 ExtendedFamilyIdLength_INTEL = 16,
2202 ExtendedFamilyIdLength_AMD = 24
2203 };
2204
2205 const size_t VENDOR_LENGTH = 13;
2206 const size_t CPU_EBS_MAX_LENGTH = (3 * 4 * 4 + 1);
2207 static char* _cpu_brand_string = nullptr;
2208 static int64_t _max_qualified_cpu_frequency = 0;
2209
2210 static int _no_of_threads = 0;
2211 static int _no_of_cores = 0;
2212
2213 const char* const _family_id_intel[ExtendedFamilyIdLength_INTEL] = {
2214 "8086/8088",
2215 "",
2216 "286",
2217 "386",
2218 "486",
2219 "Pentium",
2220 "Pentium Pro", //or Pentium-M/Woodcrest depending on model
2221 "",
2222 "",
2223 "",
2224 "",
2225 "",
2226 "",
2227 "",
2228 "",
2229 "Pentium 4"
2230 };
2231
2232 const char* const _family_id_amd[ExtendedFamilyIdLength_AMD] = {
2233 "",
2234 "",
2235 "",
2236 "",
2237 "5x86",
2238 "K5/K6",
2239 "Athlon/AthlonXP",
2240 "",
2241 "",
2242 "",
2243 "",
2244 "",
2245 "",
2246 "",
2247 "",
2248 "Opteron/Athlon64",
2249 "Opteron QC/Phenom", // Barcelona et.al.
2250 "",
2251 "",
2252 "",
2253 "",
2254 "",
2255 "",
2256 "Zen"
2257 };
2258 // Partially from Intel 64 and IA-32 Architecture Software Developer's Manual,
2259 // September 2013, Vol 3C Table 35-1
2260 const char* const _model_id_pentium_pro[] = {
2261 "",
2262 "Pentium Pro",
2263 "",
2264 "Pentium II model 3",
2265 "",
2266 "Pentium II model 5/Xeon/Celeron",
2267 "Celeron",
2268 "Pentium III/Pentium III Xeon",
2269 "Pentium III/Pentium III Xeon",
2270 "Pentium M model 9", // Yonah
2271 "Pentium III, model A",
2272 "Pentium III, model B",
2273 "",
2274 "Pentium M model D", // Dothan
2275 "",
2276 "Core 2", // 0xf Woodcrest/Conroe/Merom/Kentsfield/Clovertown
2277 "",
2278 "",
2279 "",
2280 "",
2281 "",
2282 "",
2283 "Celeron", // 0x16 Celeron 65nm
2284 "Core 2", // 0x17 Penryn / Harpertown
2285 "",
2286 "",
2287 "Core i7", // 0x1A CPU_MODEL_NEHALEM_EP
2288 "Atom", // 0x1B Z5xx series Silverthorn
2289 "",
2290 "Core 2", // 0x1D Dunnington (6-core)
2291 "Nehalem", // 0x1E CPU_MODEL_NEHALEM
2292 "",
2293 "",
2294 "",
2295 "",
2296 "",
2297 "",
2298 "Westmere", // 0x25 CPU_MODEL_WESTMERE
2299 "",
2300 "",
2301 "", // 0x28
2302 "",
2303 "Sandy Bridge", // 0x2a "2nd Generation Intel Core i7, i5, i3"
2304 "",
2305 "Westmere-EP", // 0x2c CPU_MODEL_WESTMERE_EP
2306 "Sandy Bridge-EP", // 0x2d CPU_MODEL_SANDYBRIDGE_EP
2307 "Nehalem-EX", // 0x2e CPU_MODEL_NEHALEM_EX
2308 "Westmere-EX", // 0x2f CPU_MODEL_WESTMERE_EX
2309 "",
2310 "",
2311 "",
2312 "",
2313 "",
2314 "",
2315 "",
2316 "",
2317 "",
2318 "",
2319 "Ivy Bridge", // 0x3a
2320 "",
2321 "Haswell", // 0x3c "4th Generation Intel Core Processor"
2322 "", // 0x3d "Next Generation Intel Core Processor"
2323 "Ivy Bridge-EP", // 0x3e "Next Generation Intel Xeon Processor E7 Family"
2324 "", // 0x3f "Future Generation Intel Xeon Processor"
2325 "",
2326 "",
2327 "",
2328 "",
2329 "",
2330 "Haswell", // 0x45 "4th Generation Intel Core Processor"
2331 "Haswell", // 0x46 "4th Generation Intel Core Processor"
2332 nullptr
2333 };
2334
2335 /* Brand ID is for back compatibility
2336 * Newer CPUs uses the extended brand string */
2337 const char* const _brand_id[] = {
2338 "",
2339 "Celeron processor",
2340 "Pentium III processor",
2341 "Intel Pentium III Xeon processor",
2342 "",
2343 "",
2344 "",
2345 "",
2346 "Intel Pentium 4 processor",
2347 nullptr
2348 };
2349
2350
2351 const char* const _feature_edx_id[] = {
2352 "On-Chip FPU",
2353 "Virtual Mode Extensions",
2354 "Debugging Extensions",
2355 "Page Size Extensions",
2356 "Time Stamp Counter",
2357 "Model Specific Registers",
2358 "Physical Address Extension",
2359 "Machine Check Exceptions",
2360 "CMPXCHG8B Instruction",
2361 "On-Chip APIC",
2362 "",
2363 "Fast System Call",
2364 "Memory Type Range Registers",
2365 "Page Global Enable",
2366 "Machine Check Architecture",
2367 "Conditional Mov Instruction",
2368 "Page Attribute Table",
2369 "36-bit Page Size Extension",
2370 "Processor Serial Number",
2371 "CLFLUSH Instruction",
2372 "",
2373 "Debug Trace Store feature",
2374 "ACPI registers in MSR space",
2375 "Intel Architecture MMX Technology",
2376 "Fast Float Point Save and Restore",
2377 "Streaming SIMD extensions",
2378 "Streaming SIMD extensions 2",
2379 "Self-Snoop",
2380 "Hyper Threading",
2381 "Thermal Monitor",
2382 "",
2383 "Pending Break Enable"
2384 };
2385
2386 const char* const _feature_extended_edx_id[] = {
2387 "",
2388 "",
2389 "",
2390 "",
2391 "",
2392 "",
2393 "",
2394 "",
2395 "",
2396 "",
2397 "",
2398 "SYSCALL/SYSRET",
2399 "",
2400 "",
2401 "",
2402 "",
2403 "",
2404 "",
2405 "",
2406 "",
2407 "Execute Disable Bit",
2408 "",
2409 "",
2410 "",
2411 "",
2412 "",
2413 "",
2414 "RDTSCP",
2415 "",
2416 "Intel 64 Architecture",
2417 "",
2418 ""
2419 };
2420
2421 const char* const _feature_ecx_id[] = {
2422 "Streaming SIMD Extensions 3",
2423 "PCLMULQDQ",
2424 "64-bit DS Area",
2425 "MONITOR/MWAIT instructions",
2426 "CPL Qualified Debug Store",
2427 "Virtual Machine Extensions",
2428 "Safer Mode Extensions",
2429 "Enhanced Intel SpeedStep technology",
2430 "Thermal Monitor 2",
2431 "Supplemental Streaming SIMD Extensions 3",
2432 "L1 Context ID",
2433 "",
2434 "Fused Multiply-Add",
2435 "CMPXCHG16B",
2436 "xTPR Update Control",
2437 "Perfmon and Debug Capability",
2438 "",
2439 "Process-context identifiers",
2440 "Direct Cache Access",
2441 "Streaming SIMD extensions 4.1",
2442 "Streaming SIMD extensions 4.2",
2443 "x2APIC",
2444 "MOVBE",
2445 "Popcount instruction",
2446 "TSC-Deadline",
2447 "AESNI",
2448 "XSAVE",
2449 "OSXSAVE",
2450 "AVX",
2451 "F16C",
2452 "RDRAND",
2453 ""
2454 };
2455
2456 const char* const _feature_extended_ecx_id[] = {
2457 "LAHF/SAHF instruction support",
2458 "Core multi-processor legacy mode",
2459 "",
2460 "",
2461 "",
2462 "Advanced Bit Manipulations: LZCNT",
2463 "SSE4A: MOVNTSS, MOVNTSD, EXTRQ, INSERTQ",
2464 "Misaligned SSE mode",
2465 "",
2466 "",
2467 "",
2468 "",
2469 "",
2470 "",
2471 "",
2472 "",
2473 "",
2474 "",
2475 "",
2476 "",
2477 "",
2478 "",
2479 "",
2480 "",
2481 "",
2482 "",
2483 "",
2484 "",
2485 "",
2486 "",
2487 "",
2488 ""
2489 };
2490
2491 void VM_Version::initialize_tsc(void) {
2492 ResourceMark rm;
2493
2494 cpuid_brand_string_stub_blob = BufferBlob::create("getCPUIDBrandString_stub", cpuid_brand_string_stub_size);
2495 if (cpuid_brand_string_stub_blob == nullptr) {
2496 vm_exit_during_initialization("Unable to allocate getCPUIDBrandString_stub");
2497 }
2498 CodeBuffer c(cpuid_brand_string_stub_blob);
2499 VM_Version_StubGenerator g(&c);
2500 getCPUIDBrandString_stub = CAST_TO_FN_PTR(getCPUIDBrandString_stub_t,
2501 g.generate_getCPUIDBrandString());
2502 }
2503
2504 const char* VM_Version::cpu_model_description(void) {
2505 uint32_t cpu_family = extended_cpu_family();
2506 uint32_t cpu_model = extended_cpu_model();
2507 const char* model = nullptr;
2508
2509 if (cpu_family == CPU_FAMILY_PENTIUMPRO) {
2510 for (uint32_t i = 0; i <= cpu_model; i++) {
2511 model = _model_id_pentium_pro[i];
2512 if (model == nullptr) {
2513 break;
2514 }
2515 }
2516 }
2517 return model;
2518 }
2519
2520 const char* VM_Version::cpu_brand_string(void) {
2521 if (_cpu_brand_string == nullptr) {
2522 _cpu_brand_string = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_EBS_MAX_LENGTH, mtInternal);
2523 if (nullptr == _cpu_brand_string) {
2524 return nullptr;
2525 }
2526 int ret_val = cpu_extended_brand_string(_cpu_brand_string, CPU_EBS_MAX_LENGTH);
2527 if (ret_val != OS_OK) {
2528 FREE_C_HEAP_ARRAY(char, _cpu_brand_string);
2529 _cpu_brand_string = nullptr;
2530 }
2531 }
2532 return _cpu_brand_string;
2533 }
2534
2535 const char* VM_Version::cpu_brand(void) {
2536 const char* brand = nullptr;
2537
2538 if ((_cpuid_info.std_cpuid1_ebx.value & 0xFF) > 0) {
2539 int brand_num = _cpuid_info.std_cpuid1_ebx.value & 0xFF;
2540 brand = _brand_id[0];
2541 for (int i = 0; brand != nullptr && i <= brand_num; i += 1) {
2542 brand = _brand_id[i];
2543 }
2544 }
2545 return brand;
2546 }
2547
2548 bool VM_Version::cpu_is_em64t(void) {
2549 return ((_cpuid_info.ext_cpuid1_edx.value & INTEL64_FLAG) == INTEL64_FLAG);
2550 }
2551
2552 bool VM_Version::is_netburst(void) {
2553 return (is_intel() && (extended_cpu_family() == CPU_FAMILY_PENTIUM_4));
2554 }
2555
2556 bool VM_Version::supports_tscinv_ext(void) {
2557 if (!supports_tscinv_bit()) {
2558 return false;
2559 }
2560
2561 if (is_intel()) {
2562 return true;
2563 }
2564
2565 if (is_amd()) {
2566 return !is_amd_Barcelona();
2567 }
2568
2569 if (is_hygon()) {
2570 return true;
2571 }
2572
2573 return false;
2574 }
2575
2576 void VM_Version::resolve_cpu_information_details(void) {
2577
2578 // in future we want to base this information on proper cpu
2579 // and cache topology enumeration such as:
2580 // Intel 64 Architecture Processor Topology Enumeration
2581 // which supports system cpu and cache topology enumeration
2582 // either using 2xAPICIDs or initial APICIDs
2583
2584 // currently only rough cpu information estimates
2585 // which will not necessarily reflect the exact configuration of the system
2586
2587 // this is the number of logical hardware threads
2588 // visible to the operating system
2589 _no_of_threads = os::processor_count();
2590
2591 // find out number of threads per cpu package
2592 int threads_per_package = threads_per_core() * cores_per_cpu();
2593
2594 // use amount of threads visible to the process in order to guess number of sockets
2595 _no_of_sockets = _no_of_threads / threads_per_package;
2596
2597 // process might only see a subset of the total number of threads
2598 // from a single processor package. Virtualization/resource management for example.
2599 // If so then just write a hard 1 as num of pkgs.
2600 if (0 == _no_of_sockets) {
2601 _no_of_sockets = 1;
2602 }
2603
2604 // estimate the number of cores
2605 _no_of_cores = cores_per_cpu() * _no_of_sockets;
2606 }
2607
2608
2609 const char* VM_Version::cpu_family_description(void) {
2610 int cpu_family_id = extended_cpu_family();
2611 if (is_amd()) {
2612 if (cpu_family_id < ExtendedFamilyIdLength_AMD) {
2613 return _family_id_amd[cpu_family_id];
2614 }
2615 }
2616 if (is_intel()) {
2617 if (cpu_family_id == CPU_FAMILY_PENTIUMPRO) {
2618 return cpu_model_description();
2619 }
2620 if (cpu_family_id < ExtendedFamilyIdLength_INTEL) {
2621 return _family_id_intel[cpu_family_id];
2622 }
2623 }
2624 if (is_hygon()) {
2625 return "Dhyana";
2626 }
2627 return "Unknown x86";
2628 }
2629
2630 int VM_Version::cpu_type_description(char* const buf, size_t buf_len) {
2631 assert(buf != nullptr, "buffer is null!");
2632 assert(buf_len >= CPU_TYPE_DESC_BUF_SIZE, "buffer len should at least be == CPU_TYPE_DESC_BUF_SIZE!");
2633
2634 const char* cpu_type = nullptr;
2635 const char* x64 = nullptr;
2636
2637 if (is_intel()) {
2638 cpu_type = "Intel";
2639 x64 = cpu_is_em64t() ? " Intel64" : "";
2640 } else if (is_amd()) {
2641 cpu_type = "AMD";
2642 x64 = cpu_is_em64t() ? " AMD64" : "";
2643 } else if (is_hygon()) {
2644 cpu_type = "Hygon";
2645 x64 = cpu_is_em64t() ? " AMD64" : "";
2646 } else {
2647 cpu_type = "Unknown x86";
2648 x64 = cpu_is_em64t() ? " x86_64" : "";
2649 }
2650
2651 jio_snprintf(buf, buf_len, "%s %s%s SSE SSE2%s%s%s%s%s%s%s%s",
2652 cpu_type,
2653 cpu_family_description(),
2654 supports_ht() ? " (HT)" : "",
2655 supports_sse3() ? " SSE3" : "",
2656 supports_ssse3() ? " SSSE3" : "",
2657 supports_sse4_1() ? " SSE4.1" : "",
2658 supports_sse4_2() ? " SSE4.2" : "",
2659 supports_sse4a() ? " SSE4A" : "",
2660 is_netburst() ? " Netburst" : "",
2661 is_intel_family_core() ? " Core" : "",
2662 x64);
2663
2664 return OS_OK;
2665 }
2666
2667 int VM_Version::cpu_extended_brand_string(char* const buf, size_t buf_len) {
2668 assert(buf != nullptr, "buffer is null!");
2669 assert(buf_len >= CPU_EBS_MAX_LENGTH, "buffer len should at least be == CPU_EBS_MAX_LENGTH!");
2670 assert(getCPUIDBrandString_stub != nullptr, "not initialized");
2671
2672 // invoke newly generated asm code to fetch CPU Brand String
2673 getCPUIDBrandString_stub(&_cpuid_info);
2674
2675 // fetch results into buffer
2676 *((uint32_t*) &buf[0]) = _cpuid_info.proc_name_0;
2677 *((uint32_t*) &buf[4]) = _cpuid_info.proc_name_1;
2678 *((uint32_t*) &buf[8]) = _cpuid_info.proc_name_2;
2679 *((uint32_t*) &buf[12]) = _cpuid_info.proc_name_3;
2680 *((uint32_t*) &buf[16]) = _cpuid_info.proc_name_4;
2681 *((uint32_t*) &buf[20]) = _cpuid_info.proc_name_5;
2682 *((uint32_t*) &buf[24]) = _cpuid_info.proc_name_6;
2683 *((uint32_t*) &buf[28]) = _cpuid_info.proc_name_7;
2684 *((uint32_t*) &buf[32]) = _cpuid_info.proc_name_8;
2685 *((uint32_t*) &buf[36]) = _cpuid_info.proc_name_9;
2686 *((uint32_t*) &buf[40]) = _cpuid_info.proc_name_10;
2687 *((uint32_t*) &buf[44]) = _cpuid_info.proc_name_11;
2688
2689 return OS_OK;
2690 }
2691
2692 size_t VM_Version::cpu_write_support_string(char* const buf, size_t buf_len) {
2693 guarantee(buf != nullptr, "buffer is null!");
2694 guarantee(buf_len > 0, "buffer len not enough!");
2695
2696 unsigned int flag = 0;
2697 unsigned int fi = 0;
2698 size_t written = 0;
2699 const char* prefix = "";
2700
2701 #define WRITE_TO_BUF(string) \
2702 { \
2703 int res = jio_snprintf(&buf[written], buf_len - written, "%s%s", prefix, string); \
2704 if (res < 0) { \
2705 return buf_len - 1; \
2706 } \
2707 written += res; \
2708 if (prefix[0] == '\0') { \
2709 prefix = ", "; \
2710 } \
2711 }
2712
2713 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2714 if (flag == HTT_FLAG && (((_cpuid_info.std_cpuid1_ebx.value >> 16) & 0xff) <= 1)) {
2715 continue; /* no hyperthreading */
2716 } else if (flag == SEP_FLAG && (cpu_family() == CPU_FAMILY_PENTIUMPRO && ((_cpuid_info.std_cpuid1_eax.value & 0xff) < 0x33))) {
2717 continue; /* no fast system call */
2718 }
2719 if ((_cpuid_info.std_cpuid1_edx.value & flag) && strlen(_feature_edx_id[fi]) > 0) {
2720 WRITE_TO_BUF(_feature_edx_id[fi]);
2721 }
2722 }
2723
2724 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2725 if ((_cpuid_info.std_cpuid1_ecx.value & flag) && strlen(_feature_ecx_id[fi]) > 0) {
2726 WRITE_TO_BUF(_feature_ecx_id[fi]);
2727 }
2728 }
2729
2730 for (flag = 1, fi = 0; flag <= 0x20000000 ; flag <<= 1, fi++) {
2731 if ((_cpuid_info.ext_cpuid1_ecx.value & flag) && strlen(_feature_extended_ecx_id[fi]) > 0) {
2732 WRITE_TO_BUF(_feature_extended_ecx_id[fi]);
2733 }
2734 }
2735
2736 for (flag = 1, fi = 0; flag <= 0x20000000; flag <<= 1, fi++) {
2737 if ((_cpuid_info.ext_cpuid1_edx.value & flag) && strlen(_feature_extended_edx_id[fi]) > 0) {
2738 WRITE_TO_BUF(_feature_extended_edx_id[fi]);
2739 }
2740 }
2741
2742 if (supports_tscinv_bit()) {
2743 WRITE_TO_BUF("Invariant TSC");
2744 }
2745
2746 return written;
2747 }
2748
2749 /**
2750 * Write a detailed description of the cpu to a given buffer, including
2751 * feature set.
2752 */
2753 int VM_Version::cpu_detailed_description(char* const buf, size_t buf_len) {
2754 assert(buf != nullptr, "buffer is null!");
2755 assert(buf_len >= CPU_DETAILED_DESC_BUF_SIZE, "buffer len should at least be == CPU_DETAILED_DESC_BUF_SIZE!");
2756
2757 static const char* unknown = "<unknown>";
2758 char vendor_id[VENDOR_LENGTH];
2759 const char* family = nullptr;
2760 const char* model = nullptr;
2761 const char* brand = nullptr;
2762 int outputLen = 0;
2763
2764 family = cpu_family_description();
2765 if (family == nullptr) {
2766 family = unknown;
2767 }
2768
2769 model = cpu_model_description();
2770 if (model == nullptr) {
2771 model = unknown;
2772 }
2773
2774 brand = cpu_brand_string();
2775
2776 if (brand == nullptr) {
2777 brand = cpu_brand();
2778 if (brand == nullptr) {
2779 brand = unknown;
2780 }
2781 }
2782
2783 *((uint32_t*) &vendor_id[0]) = _cpuid_info.std_vendor_name_0;
2784 *((uint32_t*) &vendor_id[4]) = _cpuid_info.std_vendor_name_2;
2785 *((uint32_t*) &vendor_id[8]) = _cpuid_info.std_vendor_name_1;
2786 vendor_id[VENDOR_LENGTH-1] = '\0';
2787
2788 outputLen = jio_snprintf(buf, buf_len, "Brand: %s, Vendor: %s\n"
2789 "Family: %s (0x%x), Model: %s (0x%x), Stepping: 0x%x\n"
2790 "Ext. family: 0x%x, Ext. model: 0x%x, Type: 0x%x, Signature: 0x%8.8x\n"
2791 "Features: ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2792 "Ext. features: eax: 0x%8.8x, ebx: 0x%8.8x, ecx: 0x%8.8x, edx: 0x%8.8x\n"
2793 "Supports: ",
2794 brand,
2795 vendor_id,
2796 family,
2797 extended_cpu_family(),
2798 model,
2799 extended_cpu_model(),
2800 cpu_stepping(),
2801 _cpuid_info.std_cpuid1_eax.bits.ext_family,
2802 _cpuid_info.std_cpuid1_eax.bits.ext_model,
2803 _cpuid_info.std_cpuid1_eax.bits.proc_type,
2804 _cpuid_info.std_cpuid1_eax.value,
2805 _cpuid_info.std_cpuid1_ebx.value,
2806 _cpuid_info.std_cpuid1_ecx.value,
2807 _cpuid_info.std_cpuid1_edx.value,
2808 _cpuid_info.ext_cpuid1_eax,
2809 _cpuid_info.ext_cpuid1_ebx,
2810 _cpuid_info.ext_cpuid1_ecx,
2811 _cpuid_info.ext_cpuid1_edx);
2812
2813 if (outputLen < 0 || (size_t) outputLen >= buf_len - 1) {
2814 if (buf_len > 0) { buf[buf_len-1] = '\0'; }
2815 return OS_ERR;
2816 }
2817
2818 cpu_write_support_string(&buf[outputLen], buf_len - outputLen);
2819
2820 return OS_OK;
2821 }
2822
2823
2824 // Fill in Abstract_VM_Version statics
2825 void VM_Version::initialize_cpu_information() {
2826 assert(_vm_version_initialized, "should have initialized VM_Version long ago");
2827 assert(!_initialized, "shouldn't be initialized yet");
2828 resolve_cpu_information_details();
2829
2830 // initialize cpu_name and cpu_desc
2831 cpu_type_description(_cpu_name, CPU_TYPE_DESC_BUF_SIZE);
2832 cpu_detailed_description(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
2833 _initialized = true;
2834 }
2835
2836 /**
2837 * For information about extracting the frequency from the cpu brand string, please see:
2838 *
2839 * Intel Processor Identification and the CPUID Instruction
2840 * Application Note 485
2841 * May 2012
2842 *
2843 * The return value is the frequency in Hz.
2844 */
2845 int64_t VM_Version::max_qualified_cpu_freq_from_brand_string(void) {
2846 const char* const brand_string = cpu_brand_string();
2847 if (brand_string == nullptr) {
2848 return 0;
2849 }
2850 const int64_t MEGA = 1000000;
2851 int64_t multiplier = 0;
2852 int64_t frequency = 0;
2853 uint8_t idx = 0;
2854 // The brand string buffer is at most 48 bytes.
2855 // -2 is to prevent buffer overrun when looking for y in yHz, as z is +2 from y.
2856 for (; idx < 48-2; ++idx) {
2857 // Format is either "x.xxyHz" or "xxxxyHz", where y=M, G, T and x are digits.
2858 // Search brand string for "yHz" where y is M, G, or T.
2859 if (brand_string[idx+1] == 'H' && brand_string[idx+2] == 'z') {
2860 if (brand_string[idx] == 'M') {
2861 multiplier = MEGA;
2862 } else if (brand_string[idx] == 'G') {
2863 multiplier = MEGA * 1000;
2864 } else if (brand_string[idx] == 'T') {
2865 multiplier = MEGA * MEGA;
2866 }
2867 break;
2868 }
2869 }
2870 if (multiplier > 0) {
2871 // Compute frequency (in Hz) from brand string.
2872 if (brand_string[idx-3] == '.') { // if format is "x.xx"
2873 frequency = (brand_string[idx-4] - '0') * multiplier;
2874 frequency += (brand_string[idx-2] - '0') * multiplier / 10;
2875 frequency += (brand_string[idx-1] - '0') * multiplier / 100;
2876 } else { // format is "xxxx"
2877 frequency = (brand_string[idx-4] - '0') * 1000;
2878 frequency += (brand_string[idx-3] - '0') * 100;
2879 frequency += (brand_string[idx-2] - '0') * 10;
2880 frequency += (brand_string[idx-1] - '0');
2881 frequency *= multiplier;
2882 }
2883 }
2884 return frequency;
2885 }
2886
2887
2888 int64_t VM_Version::maximum_qualified_cpu_frequency(void) {
2889 if (_max_qualified_cpu_frequency == 0) {
2890 _max_qualified_cpu_frequency = max_qualified_cpu_freq_from_brand_string();
2891 }
2892 return _max_qualified_cpu_frequency;
2893 }
2894
2895 VM_Version::VM_Features VM_Version::CpuidInfo::feature_flags() const {
2896 VM_Features vm_features;
2897 if (std_cpuid1_edx.bits.cmpxchg8 != 0)
2898 vm_features.set_feature(CPU_CX8);
2899 if (std_cpuid1_edx.bits.cmov != 0)
2900 vm_features.set_feature(CPU_CMOV);
2901 if (std_cpuid1_edx.bits.clflush != 0)
2902 vm_features.set_feature(CPU_FLUSH);
2903 // clflush should always be available on x86_64
2904 // if not we are in real trouble because we rely on it
2905 // to flush the code cache.
2906 assert (vm_features.supports_feature(CPU_FLUSH), "clflush should be available");
2907 if (std_cpuid1_edx.bits.fxsr != 0 || (is_amd_family() &&
2908 ext_cpuid1_edx.bits.fxsr != 0))
2909 vm_features.set_feature(CPU_FXSR);
2910 // HT flag is set for multi-core processors also.
2911 if (threads_per_core() > 1)
2912 vm_features.set_feature(CPU_HT);
2913 if (std_cpuid1_edx.bits.mmx != 0 || (is_amd_family() &&
2914 ext_cpuid1_edx.bits.mmx != 0))
2915 vm_features.set_feature(CPU_MMX);
2916 if (std_cpuid1_edx.bits.sse != 0)
2917 vm_features.set_feature(CPU_SSE);
2918 if (std_cpuid1_edx.bits.sse2 != 0)
2919 vm_features.set_feature(CPU_SSE2);
2920 if (std_cpuid1_ecx.bits.sse3 != 0)
2921 vm_features.set_feature(CPU_SSE3);
2922 if (std_cpuid1_ecx.bits.ssse3 != 0)
2923 vm_features.set_feature(CPU_SSSE3);
2924 if (std_cpuid1_ecx.bits.sse4_1 != 0)
2925 vm_features.set_feature(CPU_SSE4_1);
2926 if (std_cpuid1_ecx.bits.sse4_2 != 0)
2927 vm_features.set_feature(CPU_SSE4_2);
2928 if (std_cpuid1_ecx.bits.popcnt != 0)
2929 vm_features.set_feature(CPU_POPCNT);
2930 if (sefsl1_cpuid7_edx.bits.apx_f != 0 &&
2931 xem_xcr0_eax.bits.apx_f != 0 &&
2932 std_cpuid29_ebx.bits.apx_nci_ndd_nf != 0) {
2933 vm_features.set_feature(CPU_APX_F);
2934 }
2935 if (std_cpuid1_ecx.bits.avx != 0 &&
2936 std_cpuid1_ecx.bits.osxsave != 0 &&
2937 xem_xcr0_eax.bits.sse != 0 &&
2938 xem_xcr0_eax.bits.ymm != 0) {
2939 vm_features.set_feature(CPU_AVX);
2940 vm_features.set_feature(CPU_VZEROUPPER);
2941 if (sefsl1_cpuid7_eax.bits.sha512 != 0)
2942 vm_features.set_feature(CPU_SHA512);
2943 if (std_cpuid1_ecx.bits.f16c != 0)
2944 vm_features.set_feature(CPU_F16C);
2945 if (sef_cpuid7_ebx.bits.avx2 != 0) {
2946 vm_features.set_feature(CPU_AVX2);
2947 if (sefsl1_cpuid7_eax.bits.avx_ifma != 0)
2948 vm_features.set_feature(CPU_AVX_IFMA);
2949 }
2950 if (sef_cpuid7_ecx.bits.gfni != 0)
2951 vm_features.set_feature(CPU_GFNI);
2952 if (sef_cpuid7_ebx.bits.avx512f != 0 &&
2953 xem_xcr0_eax.bits.opmask != 0 &&
2954 xem_xcr0_eax.bits.zmm512 != 0 &&
2955 xem_xcr0_eax.bits.zmm32 != 0) {
2956 vm_features.set_feature(CPU_AVX512F);
2957 if (sef_cpuid7_ebx.bits.avx512cd != 0)
2958 vm_features.set_feature(CPU_AVX512CD);
2959 if (sef_cpuid7_ebx.bits.avx512dq != 0)
2960 vm_features.set_feature(CPU_AVX512DQ);
2961 if (sef_cpuid7_ebx.bits.avx512ifma != 0)
2962 vm_features.set_feature(CPU_AVX512_IFMA);
2963 if (sef_cpuid7_ebx.bits.avx512pf != 0)
2964 vm_features.set_feature(CPU_AVX512PF);
2965 if (sef_cpuid7_ebx.bits.avx512er != 0)
2966 vm_features.set_feature(CPU_AVX512ER);
2967 if (sef_cpuid7_ebx.bits.avx512bw != 0)
2968 vm_features.set_feature(CPU_AVX512BW);
2969 if (sef_cpuid7_ebx.bits.avx512vl != 0)
2970 vm_features.set_feature(CPU_AVX512VL);
2971 if (sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
2972 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
2973 if (sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
2974 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
2975 if (sef_cpuid7_ecx.bits.vaes != 0)
2976 vm_features.set_feature(CPU_AVX512_VAES);
2977 if (sef_cpuid7_ecx.bits.avx512_vnni != 0)
2978 vm_features.set_feature(CPU_AVX512_VNNI);
2979 if (sef_cpuid7_ecx.bits.avx512_bitalg != 0)
2980 vm_features.set_feature(CPU_AVX512_BITALG);
2981 if (sef_cpuid7_ecx.bits.avx512_vbmi != 0)
2982 vm_features.set_feature(CPU_AVX512_VBMI);
2983 if (sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
2984 vm_features.set_feature(CPU_AVX512_VBMI2);
2985 }
2986 if (is_intel()) {
2987 if (sefsl1_cpuid7_edx.bits.avx10 != 0 &&
2988 std_cpuid24_ebx.bits.avx10_vlen_512 !=0 &&
2989 std_cpuid24_ebx.bits.avx10_converged_isa_version >= 1 &&
2990 xem_xcr0_eax.bits.opmask != 0 &&
2991 xem_xcr0_eax.bits.zmm512 != 0 &&
2992 xem_xcr0_eax.bits.zmm32 != 0) {
2993 vm_features.set_feature(CPU_AVX10_1);
2994 vm_features.set_feature(CPU_AVX512F);
2995 vm_features.set_feature(CPU_AVX512CD);
2996 vm_features.set_feature(CPU_AVX512DQ);
2997 vm_features.set_feature(CPU_AVX512PF);
2998 vm_features.set_feature(CPU_AVX512ER);
2999 vm_features.set_feature(CPU_AVX512BW);
3000 vm_features.set_feature(CPU_AVX512VL);
3001 vm_features.set_feature(CPU_AVX512_VPOPCNTDQ);
3002 vm_features.set_feature(CPU_AVX512_VPCLMULQDQ);
3003 vm_features.set_feature(CPU_AVX512_VAES);
3004 vm_features.set_feature(CPU_AVX512_VNNI);
3005 vm_features.set_feature(CPU_AVX512_BITALG);
3006 vm_features.set_feature(CPU_AVX512_VBMI);
3007 vm_features.set_feature(CPU_AVX512_VBMI2);
3008 if (std_cpuid24_ebx.bits.avx10_converged_isa_version >= 2) {
3009 vm_features.set_feature(CPU_AVX10_2);
3010 }
3011 }
3012 }
3013 }
3014
3015 if (std_cpuid1_ecx.bits.hv != 0)
3016 vm_features.set_feature(CPU_HV);
3017 if (sef_cpuid7_ebx.bits.bmi1 != 0)
3018 vm_features.set_feature(CPU_BMI1);
3019 if (std_cpuid1_edx.bits.tsc != 0)
3020 vm_features.set_feature(CPU_TSC);
3021 if (ext_cpuid7_edx.bits.tsc_invariance != 0)
3022 vm_features.set_feature(CPU_TSCINV_BIT);
3023 if (std_cpuid1_ecx.bits.aes != 0)
3024 vm_features.set_feature(CPU_AES);
3025 if (ext_cpuid1_ecx.bits.lzcnt != 0)
3026 vm_features.set_feature(CPU_LZCNT);
3027 if (ext_cpuid1_ecx.bits.prefetchw != 0)
3028 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3029 if (sef_cpuid7_ebx.bits.erms != 0)
3030 vm_features.set_feature(CPU_ERMS);
3031 if (sef_cpuid7_edx.bits.fast_short_rep_mov != 0)
3032 vm_features.set_feature(CPU_FSRM);
3033 if (std_cpuid1_ecx.bits.clmul != 0)
3034 vm_features.set_feature(CPU_CLMUL);
3035 if (sef_cpuid7_ebx.bits.rtm != 0)
3036 vm_features.set_feature(CPU_RTM);
3037 if (sef_cpuid7_ebx.bits.adx != 0)
3038 vm_features.set_feature(CPU_ADX);
3039 if (sef_cpuid7_ebx.bits.bmi2 != 0)
3040 vm_features.set_feature(CPU_BMI2);
3041 if (sef_cpuid7_ebx.bits.sha != 0)
3042 vm_features.set_feature(CPU_SHA);
3043 if (std_cpuid1_ecx.bits.fma != 0)
3044 vm_features.set_feature(CPU_FMA);
3045 if (sef_cpuid7_ebx.bits.clflushopt != 0)
3046 vm_features.set_feature(CPU_FLUSHOPT);
3047 if (sef_cpuid7_ebx.bits.clwb != 0)
3048 vm_features.set_feature(CPU_CLWB);
3049 if (ext_cpuid1_edx.bits.rdtscp != 0)
3050 vm_features.set_feature(CPU_RDTSCP);
3051 if (sef_cpuid7_ecx.bits.rdpid != 0)
3052 vm_features.set_feature(CPU_RDPID);
3053
3054 // AMD|Hygon additional features.
3055 if (is_amd_family()) {
3056 // PREFETCHW was checked above, check TDNOW here.
3057 if ((ext_cpuid1_edx.bits.tdnow != 0))
3058 vm_features.set_feature(CPU_3DNOW_PREFETCH);
3059 if (ext_cpuid1_ecx.bits.sse4a != 0)
3060 vm_features.set_feature(CPU_SSE4A);
3061 }
3062
3063 // Intel additional features.
3064 if (is_intel()) {
3065 if (sef_cpuid7_edx.bits.serialize != 0)
3066 vm_features.set_feature(CPU_SERIALIZE);
3067 if (sef_cpuid7_edx.bits.hybrid != 0)
3068 vm_features.set_feature(CPU_HYBRID);
3069 if (_cpuid_info.sef_cpuid7_edx.bits.avx512_fp16 != 0)
3070 vm_features.set_feature(CPU_AVX512_FP16);
3071 }
3072
3073 // ZX additional features.
3074 if (is_zx()) {
3075 // We do not know if these are supported by ZX, so we cannot trust
3076 // common CPUID bit for them.
3077 assert(vm_features.supports_feature(CPU_CLWB), "Check if it is supported?");
3078 vm_features.clear_feature(CPU_CLWB);
3079 }
3080
3081 // Protection key features.
3082 if (sef_cpuid7_ecx.bits.pku != 0) {
3083 vm_features.set_feature(CPU_PKU);
3084 }
3085 if (sef_cpuid7_ecx.bits.ospke != 0) {
3086 vm_features.set_feature(CPU_OSPKE);
3087 }
3088
3089 // Control flow enforcement (CET) features.
3090 if (sef_cpuid7_ecx.bits.cet_ss != 0) {
3091 vm_features.set_feature(CPU_CET_SS);
3092 }
3093 if (sef_cpuid7_edx.bits.cet_ibt != 0) {
3094 vm_features.set_feature(CPU_CET_IBT);
3095 }
3096
3097 // Composite features.
3098 if (supports_tscinv_bit() &&
3099 ((is_amd_family() && !is_amd_Barcelona()) ||
3100 is_intel_tsc_synched_at_init())) {
3101 vm_features.set_feature(CPU_TSCINV);
3102 }
3103 return vm_features;
3104 }
3105
3106 bool VM_Version::os_supports_avx_vectors() {
3107 bool retVal = false;
3108 int nreg = 4;
3109 if (supports_evex()) {
3110 // Verify that OS save/restore all bits of EVEX registers
3111 // during signal processing.
3112 retVal = true;
3113 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3114 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3115 retVal = false;
3116 break;
3117 }
3118 }
3119 } else if (supports_avx()) {
3120 // Verify that OS save/restore all bits of AVX registers
3121 // during signal processing.
3122 retVal = true;
3123 for (int i = 0; i < 8 * nreg; i++) { // 32 bytes per ymm register
3124 if (_cpuid_info.ymm_save[i] != ymm_test_value()) {
3125 retVal = false;
3126 break;
3127 }
3128 }
3129 // zmm_save will be set on a EVEX enabled machine even if we choose AVX code gen
3130 if (retVal == false) {
3131 // Verify that OS save/restore all bits of EVEX registers
3132 // during signal processing.
3133 retVal = true;
3134 for (int i = 0; i < 16 * nreg; i++) { // 64 bytes per zmm register
3135 if (_cpuid_info.zmm_save[i] != ymm_test_value()) {
3136 retVal = false;
3137 break;
3138 }
3139 }
3140 }
3141 }
3142 return retVal;
3143 }
3144
3145 bool VM_Version::os_supports_apx_egprs() {
3146 if (!supports_apx_f()) {
3147 return false;
3148 }
3149 if (_cpuid_info.apx_save[0] != egpr_test_value() ||
3150 _cpuid_info.apx_save[1] != egpr_test_value()) {
3151 return false;
3152 }
3153 return true;
3154 }
3155
3156 uint VM_Version::cores_per_cpu() {
3157 uint result = 1;
3158 if (is_intel()) {
3159 bool supports_topology = supports_processor_topology();
3160 if (supports_topology) {
3161 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3162 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3163 }
3164 if (!supports_topology || result == 0) {
3165 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3166 }
3167 } else if (is_amd_family()) {
3168 result = _cpuid_info.ext_cpuid8_ecx.bits.threads_per_cpu + 1;
3169 if (cpu_family() >= 0x17) { // Zen or later
3170 result /= _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3171 }
3172 } else if (is_zx()) {
3173 bool supports_topology = supports_processor_topology();
3174 if (supports_topology) {
3175 result = _cpuid_info.tpl_cpuidB1_ebx.bits.logical_cpus /
3176 _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3177 }
3178 if (!supports_topology || result == 0) {
3179 result = (_cpuid_info.dcp_cpuid4_eax.bits.cores_per_cpu + 1);
3180 }
3181 }
3182 return result;
3183 }
3184
3185 uint VM_Version::threads_per_core() {
3186 uint result = 1;
3187 if (is_intel() && supports_processor_topology()) {
3188 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3189 } else if (is_zx() && supports_processor_topology()) {
3190 result = _cpuid_info.tpl_cpuidB0_ebx.bits.logical_cpus;
3191 } else if (_cpuid_info.std_cpuid1_edx.bits.ht != 0) {
3192 if (cpu_family() >= 0x17) {
3193 result = _cpuid_info.ext_cpuid1E_ebx.bits.threads_per_core + 1;
3194 } else {
3195 result = _cpuid_info.std_cpuid1_ebx.bits.threads_per_cpu /
3196 cores_per_cpu();
3197 }
3198 }
3199 return (result == 0 ? 1 : result);
3200 }
3201
3202 uint VM_Version::L1_line_size() {
3203 uint result = 0;
3204 if (is_intel()) {
3205 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3206 } else if (is_amd_family()) {
3207 result = _cpuid_info.ext_cpuid5_ecx.bits.L1_line_size;
3208 } else if (is_zx()) {
3209 result = (_cpuid_info.dcp_cpuid4_ebx.bits.L1_line_size + 1);
3210 }
3211 if (result < 32) // not defined ?
3212 result = 32; // 32 bytes by default on x86 and other x64
3213 return result;
3214 }
3215
3216 bool VM_Version::is_intel_tsc_synched_at_init() {
3217 if (is_intel_family_core()) {
3218 uint32_t ext_model = extended_cpu_model();
3219 if (ext_model == CPU_MODEL_NEHALEM_EP ||
3220 ext_model == CPU_MODEL_WESTMERE_EP ||
3221 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
3222 ext_model == CPU_MODEL_IVYBRIDGE_EP) {
3223 // <= 2-socket invariant tsc support. EX versions are usually used
3224 // in > 2-socket systems and likely don't synchronize tscs at
3225 // initialization.
3226 // Code that uses tsc values must be prepared for them to arbitrarily
3227 // jump forward or backward.
3228 return true;
3229 }
3230 }
3231 return false;
3232 }
3233
3234 int VM_Version::allocate_prefetch_distance(bool use_watermark_prefetch) {
3235 // Hardware prefetching (distance/size in bytes):
3236 // Pentium 3 - 64 / 32
3237 // Pentium 4 - 256 / 128
3238 // Athlon - 64 / 32 ????
3239 // Opteron - 128 / 64 only when 2 sequential cache lines accessed
3240 // Core - 128 / 64
3241 //
3242 // Software prefetching (distance in bytes / instruction with best score):
3243 // Pentium 3 - 128 / prefetchnta
3244 // Pentium 4 - 512 / prefetchnta
3245 // Athlon - 128 / prefetchnta
3246 // Opteron - 256 / prefetchnta
3247 // Core - 256 / prefetchnta
3248 // It will be used only when AllocatePrefetchStyle > 0
3249
3250 if (is_amd_family()) { // AMD | Hygon
3251 if (supports_sse2()) {
3252 return 256; // Opteron
3253 } else {
3254 return 128; // Athlon
3255 }
3256 } else { // Intel
3257 if (supports_sse3() && is_intel_server_family()) {
3258 if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
3259 return 192;
3260 } else if (use_watermark_prefetch) { // watermark prefetching on Core
3261 return 384;
3262 }
3263 }
3264 if (supports_sse2()) {
3265 if (is_intel_server_family()) {
3266 return 256; // Pentium M, Core, Core2
3267 } else {
3268 return 512; // Pentium 4
3269 }
3270 } else {
3271 return 128; // Pentium 3 (and all other old CPUs)
3272 }
3273 }
3274 }
3275
3276 bool VM_Version::is_intrinsic_supported(vmIntrinsicID id) {
3277 assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
3278 switch (id) {
3279 case vmIntrinsics::_floatToFloat16:
3280 case vmIntrinsics::_float16ToFloat:
3281 if (!supports_float16()) {
3282 return false;
3283 }
3284 break;
3285 default:
3286 break;
3287 }
3288 return true;
3289 }
3290
3291 void VM_Version::insert_features_names(VM_Version::VM_Features features, stringStream& ss) {
3292 int i = 0;
3293 ss.join([&]() {
3294 while (i < MAX_CPU_FEATURES) {
3295 if (_features.supports_feature((VM_Version::Feature_Flag)i)) {
3296 return _features_names[i++];
3297 }
3298 i += 1;
3299 }
3300 return (const char*)nullptr;
3301 }, ", ");
3302 }