Do not always enable SSE4 on X64 due to old Atom chips. Enable instead only for AES-NI code which is only run if AES-NI is present, which it is not on these old chips.

This commit is contained in:
Adam Ierymenko 2020-10-13 16:08:30 -04:00
parent ee04118172
commit 2c75be0d64
No known key found for this signature in database
GPG key ID: C8877CF2D7A5D7F3
2 changed files with 9 additions and 4 deletions

View file

@ -119,15 +119,15 @@ ifeq ($(CC_MACH),x86_64)
ZT_ARCHITECTURE=2
ZT_USE_X64_ASM_SALSA=1
ZT_USE_X64_ASM_ED25519=1
override CFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul
override CXXFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul
override CFLAGS+=-msse -msse2 -maes -mpclmul
override CXXFLAGS+=-msse -msse2 -maes -mpclmul
endif
ifeq ($(CC_MACH),amd64)
ZT_ARCHITECTURE=2
ZT_USE_X64_ASM_SALSA=1
ZT_USE_X64_ASM_ED25519=1
override CFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul
override CXXFLAGS+=-msse -msse2 -mssse3 -msse4 -msse4.1 -msse4.2 -maes -mpclmul
override CFLAGS+=-msse -msse2 -maes -mpclmul
override CXXFLAGS+=-msse -msse2 -maes -mpclmul
endif
ifeq ($(CC_MACH),powerpc64le)
ZT_ARCHITECTURE=8

View file

@ -146,6 +146,7 @@ void s_gfmul(const uint64_t hh, const uint64_t hl, uint64_t &y0, uint64_t &y1) n
// SSE shuffle parameter to reverse bytes in a 128-bit vector.
static const __m128i s_sseSwapBytes = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
static __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
{
y = _mm_shuffle_epi8(y, s_sseSwapBytes);
@ -169,6 +170,7 @@ static __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
#endif
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
void AES::GMAC::update(const void *const data, unsigned int len) noexcept
{
const uint8_t *in = reinterpret_cast<const uint8_t *>(data);
@ -322,6 +324,7 @@ void AES::GMAC::update(const void *const data, unsigned int len) noexcept
_rp = len; // len is always less than 16 here
}
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
void AES::GMAC::finish(uint8_t tag[16]) noexcept
{
#ifdef ZT_AES_AESNI
@ -593,6 +596,7 @@ void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, co
#endif // ZT_AES_AESNI
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
void AES::CTR::crypt(const void *const input, unsigned int len) noexcept
{
const uint8_t *in = reinterpret_cast<const uint8_t *>(input);
@ -1473,6 +1477,7 @@ static __m128i _init256_2_aesni(__m128i a, __m128i b) noexcept
return x;
}
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2")))
void AES::_init_aesni(const uint8_t key[32]) noexcept
{
__m128i t1, t2, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13;