344 decl(AVX512CD, "avx512cd", 31) \
345 \
346 decl(AVX512BW, "avx512bw", 32) /* Byte and word vector instructions */ \
347 decl(AVX512VL, "avx512vl", 33) /* EVEX instructions with smaller vector length */ \
348 decl(SHA, "sha", 34) /* SHA instructions */ \
349 decl(FMA, "fma", 35) /* FMA instructions */ \
350 \
351 decl(VZEROUPPER, "vzeroupper", 36) /* Vzeroupper instruction */ \
352 decl(AVX512_VPOPCNTDQ, "avx512_vpopcntdq", 37) /* Vector popcount */ \
353 decl(AVX512_VPCLMULQDQ, "avx512_vpclmulqdq", 38) /* Vector carryless multiplication */ \
354 decl(AVX512_VAES, "avx512_vaes", 39) /* Vector AES instruction */ \
355 \
356 decl(AVX512_VNNI, "avx512_vnni", 40) /* Vector Neural Network Instructions */ \
357 decl(FLUSH, "clflush", 41) /* flush instruction */ \
358 decl(FLUSHOPT, "clflushopt", 42) /* flusopth instruction */ \
359 decl(CLWB, "clwb", 43) /* clwb instruction */ \
360 \
361 decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \
362 decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
363 decl(HV, "hv", 46) /* Hypervisor instructions */ \
364 decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */
365
366 #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
367 CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
368 #undef DECLARE_CPU_FEATURE_FLAG
369 };
370
371 static const char* _features_names[];
372
373 enum Extended_Family {
374 // AMD
375 CPU_FAMILY_AMD_11H = 0x11,
376 // ZX
377 CPU_FAMILY_ZX_CORE_F6 = 6,
378 CPU_FAMILY_ZX_CORE_F7 = 7,
379 // Intel
380 CPU_FAMILY_INTEL_CORE = 6,
381 CPU_MODEL_NEHALEM = 0x1e,
382 CPU_MODEL_NEHALEM_EP = 0x1a,
383 CPU_MODEL_NEHALEM_EX = 0x2e,
384 CPU_MODEL_WESTMERE = 0x25,
574 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
575 result |= CPU_AVX512F;
576 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
577 result |= CPU_AVX512CD;
578 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
579 result |= CPU_AVX512DQ;
580 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
581 result |= CPU_AVX512PF;
582 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
583 result |= CPU_AVX512ER;
584 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
585 result |= CPU_AVX512BW;
586 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
587 result |= CPU_AVX512VL;
588 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
589 result |= CPU_AVX512_VPOPCNTDQ;
590 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
591 result |= CPU_AVX512_VPCLMULQDQ;
592 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
593 result |= CPU_AVX512_VAES;
594 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
595 result |= CPU_AVX512_VNNI;
596 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
597 result |= CPU_AVX512_VBMI;
598 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
599 result |= CPU_AVX512_VBMI2;
600 }
601 }
602 if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
603 result |= CPU_HV;
604 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
605 result |= CPU_BMI1;
606 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
607 result |= CPU_TSC;
608 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
609 result |= CPU_TSCINV_BIT;
610 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
611 result |= CPU_AES;
612 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
613 result |= CPU_ERMS;
614 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
615 result |= CPU_CLMUL;
880 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
881 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
882 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
883 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
884 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
885 static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
886 static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); }
887 static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
888 static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
889 supports_avx512bw() && supports_avx512dq()); }
890 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
891 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
892 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
893 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
894 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
895 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
896 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
897 static bool supports_avx512_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
898 static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
899 static bool supports_avx512_vaes() { return (_features & CPU_AVX512_VAES) != 0; }
900 static bool supports_avx512_vnni() { return (_features & CPU_AVX512_VNNI) != 0; }
901 static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
902 static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
903 static bool supports_hv() { return (_features & CPU_HV) != 0; }
904 static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
905
906 // Intel features
907 static bool is_intel_family_core() { return is_intel() &&
908 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
909
910 static bool is_intel_skylake() { return is_intel_family_core() &&
911 extended_cpu_model() == CPU_MODEL_SKYLAKE; }
912
913 static int avx3_threshold();
914
915 static bool is_intel_tsc_synched_at_init() {
916 if (is_intel_family_core()) {
917 uint32_t ext_model = extended_cpu_model();
918 if (ext_model == CPU_MODEL_NEHALEM_EP ||
919 ext_model == CPU_MODEL_WESTMERE_EP ||
920 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
|
344 decl(AVX512CD, "avx512cd", 31) \
345 \
346 decl(AVX512BW, "avx512bw", 32) /* Byte and word vector instructions */ \
347 decl(AVX512VL, "avx512vl", 33) /* EVEX instructions with smaller vector length */ \
348 decl(SHA, "sha", 34) /* SHA instructions */ \
349 decl(FMA, "fma", 35) /* FMA instructions */ \
350 \
351 decl(VZEROUPPER, "vzeroupper", 36) /* Vzeroupper instruction */ \
352 decl(AVX512_VPOPCNTDQ, "avx512_vpopcntdq", 37) /* Vector popcount */ \
353 decl(AVX512_VPCLMULQDQ, "avx512_vpclmulqdq", 38) /* Vector carryless multiplication */ \
354 decl(AVX512_VAES, "avx512_vaes", 39) /* Vector AES instruction */ \
355 \
356 decl(AVX512_VNNI, "avx512_vnni", 40) /* Vector Neural Network Instructions */ \
357 decl(FLUSH, "clflush", 41) /* flush instruction */ \
358 decl(FLUSHOPT, "clflushopt", 42) /* flusopth instruction */ \
359 decl(CLWB, "clwb", 43) /* clwb instruction */ \
360 \
361 decl(AVX512_VBMI2, "avx512_vbmi2", 44) /* VBMI2 shift left double instructions */ \
362 decl(AVX512_VBMI, "avx512_vbmi", 45) /* Vector BMI instructions */ \
363 decl(HV, "hv", 46) /* Hypervisor instructions */ \
364 decl(SERIALIZE, "serialize", 47) /* CPU SERIALIZE */ \
365 decl(GFNI, "gfni", 48) /* Vector GFNI instructions */ \
366 decl(AVX512_BITALG, "avx512_bitalg", 49) /* Vector sub-word popcount and bit gather instructions */
367
368 #define DECLARE_CPU_FEATURE_FLAG(id, name, bit) CPU_##id = (1ULL << bit),
369 CPU_FEATURE_FLAGS(DECLARE_CPU_FEATURE_FLAG)
370 #undef DECLARE_CPU_FEATURE_FLAG
371 };
372
373 static const char* _features_names[];
374
375 enum Extended_Family {
376 // AMD
377 CPU_FAMILY_AMD_11H = 0x11,
378 // ZX
379 CPU_FAMILY_ZX_CORE_F6 = 6,
380 CPU_FAMILY_ZX_CORE_F7 = 7,
381 // Intel
382 CPU_FAMILY_INTEL_CORE = 6,
383 CPU_MODEL_NEHALEM = 0x1e,
384 CPU_MODEL_NEHALEM_EP = 0x1a,
385 CPU_MODEL_NEHALEM_EX = 0x2e,
386 CPU_MODEL_WESTMERE = 0x25,
576 _cpuid_info.xem_xcr0_eax.bits.zmm32 != 0) {
577 result |= CPU_AVX512F;
578 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512cd != 0)
579 result |= CPU_AVX512CD;
580 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512dq != 0)
581 result |= CPU_AVX512DQ;
582 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512pf != 0)
583 result |= CPU_AVX512PF;
584 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512er != 0)
585 result |= CPU_AVX512ER;
586 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512bw != 0)
587 result |= CPU_AVX512BW;
588 if (_cpuid_info.sef_cpuid7_ebx.bits.avx512vl != 0)
589 result |= CPU_AVX512VL;
590 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpopcntdq != 0)
591 result |= CPU_AVX512_VPOPCNTDQ;
592 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vpclmulqdq != 0)
593 result |= CPU_AVX512_VPCLMULQDQ;
594 if (_cpuid_info.sef_cpuid7_ecx.bits.vaes != 0)
595 result |= CPU_AVX512_VAES;
596 if (_cpuid_info.sef_cpuid7_ecx.bits.gfni != 0)
597 result |= CPU_GFNI;
598 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vnni != 0)
599 result |= CPU_AVX512_VNNI;
600 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_bitalg != 0)
601 result |= CPU_AVX512_BITALG;
602 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi != 0)
603 result |= CPU_AVX512_VBMI;
604 if (_cpuid_info.sef_cpuid7_ecx.bits.avx512_vbmi2 != 0)
605 result |= CPU_AVX512_VBMI2;
606 }
607 }
608 if (_cpuid_info.std_cpuid1_ecx.bits.hv != 0)
609 result |= CPU_HV;
610 if (_cpuid_info.sef_cpuid7_ebx.bits.bmi1 != 0)
611 result |= CPU_BMI1;
612 if (_cpuid_info.std_cpuid1_edx.bits.tsc != 0)
613 result |= CPU_TSC;
614 if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
615 result |= CPU_TSCINV_BIT;
616 if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
617 result |= CPU_AES;
618 if (_cpuid_info.sef_cpuid7_ebx.bits.erms != 0)
619 result |= CPU_ERMS;
620 if (_cpuid_info.std_cpuid1_ecx.bits.clmul != 0)
621 result |= CPU_CLMUL;
886 static bool supports_avx512pf() { return (_features & CPU_AVX512PF) != 0; }
887 static bool supports_avx512er() { return (_features & CPU_AVX512ER) != 0; }
888 static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
889 static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
890 static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
891 static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
892 static bool supports_avx512bwdq() { return (supports_evex() && supports_avx512bw() && supports_avx512dq()); }
893 static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
894 static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
895 supports_avx512bw() && supports_avx512dq()); }
896 static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
897 static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
898 static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
899 static bool supports_avxonly() { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
900 static bool supports_sha() { return (_features & CPU_SHA) != 0; }
901 static bool supports_fma() { return (_features & CPU_FMA) != 0 && supports_avx(); }
902 static bool supports_vzeroupper() { return (_features & CPU_VZEROUPPER) != 0; }
903 static bool supports_avx512_vpopcntdq() { return (_features & CPU_AVX512_VPOPCNTDQ) != 0; }
904 static bool supports_avx512_vpclmulqdq() { return (_features & CPU_AVX512_VPCLMULQDQ) != 0; }
905 static bool supports_avx512_vaes() { return (_features & CPU_AVX512_VAES) != 0; }
906 static bool supports_gfni() { return (_features & CPU_GFNI) != 0; }
907 static bool supports_avx512_vnni() { return (_features & CPU_AVX512_VNNI) != 0; }
908 static bool supports_avx512_bitalg() { return (_features & CPU_AVX512_BITALG) != 0; }
909 static bool supports_avx512_vbmi() { return (_features & CPU_AVX512_VBMI) != 0; }
910 static bool supports_avx512_vbmi2() { return (_features & CPU_AVX512_VBMI2) != 0; }
911 static bool supports_hv() { return (_features & CPU_HV) != 0; }
912 static bool supports_serialize() { return (_features & CPU_SERIALIZE) != 0; }
913
914 // Intel features
915 static bool is_intel_family_core() { return is_intel() &&
916 extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
917
918 static bool is_intel_skylake() { return is_intel_family_core() &&
919 extended_cpu_model() == CPU_MODEL_SKYLAKE; }
920
921 static int avx3_threshold();
922
923 static bool is_intel_tsc_synched_at_init() {
924 if (is_intel_family_core()) {
925 uint32_t ext_model = extended_cpu_model();
926 if (ext_model == CPU_MODEL_NEHALEM_EP ||
927 ext_model == CPU_MODEL_WESTMERE_EP ||
928 ext_model == CPU_MODEL_SANDYBRIDGE_EP ||
|