diff --git a/ext/installfiles/windows/ZeroTier One.aip b/ext/installfiles/windows/ZeroTier One.aip index a5b4f4158..955248c4b 100644 --- a/ext/installfiles/windows/ZeroTier One.aip +++ b/ext/installfiles/windows/ZeroTier One.aip @@ -1,5 +1,5 @@ - + @@ -25,10 +25,10 @@ - + - + @@ -58,7 +58,7 @@ - + @@ -131,7 +131,7 @@ - + @@ -465,28 +465,28 @@ - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + diff --git a/node/AES_aesni.cpp b/node/AES_aesni.cpp index 6fe705ecf..a185b1b36 100644 --- a/node/AES_aesni.cpp +++ b/node/AES_aesni.cpp @@ -26,7 +26,9 @@ namespace { const __m128i s_sseSwapBytes = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul"))) +#endif __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept { y = _mm_shuffle_epi8(y, s_sseSwapBytes); @@ -57,7 +59,9 @@ __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept #define ZT_AES_VAES512 1 +#ifdef __GNUC__ __attribute__((__target__("sse4,aes,avx,avx2,vaes,avx512f,avx512bw"))) +#endif void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, const uint8_t *&in, uint8_t *&out, const __m128i *const k) noexcept { const __m512i kk0 = _mm512_broadcast_i32x4(k[0]); @@ -107,7 +111,9 @@ void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, co #define ZT_AES_VAES256 1 +#ifdef __GNUC__ __attribute__((__target__("sse4,aes,avx,avx2,vaes"))) +#endif void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, const uint8_t *&in, uint8_t *&out, const __m128i *const k) noexcept { const __m256i kk0 = _mm256_broadcastsi128_si256(k[0]); @@ -175,7 +181,9 @@ void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, co #endif // does compiler support AVX2 and AVX512 AES intrinsics? +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul"))) +#endif __m128i p_init256_1_aesni(__m128i a, __m128i b) noexcept { __m128i x, y; @@ -190,7 +198,9 @@ __m128i p_init256_1_aesni(__m128i a, __m128i b) noexcept return x; } +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul"))) +#endif __m128i p_init256_2_aesni(__m128i a, __m128i b) noexcept { __m128i x, y, z; @@ -208,7 +218,9 @@ __m128i p_init256_2_aesni(__m128i a, __m128i b) noexcept } // anonymous namespace +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul"))) +#endif void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept { __m128i y = _mm_loadu_si128(reinterpret_cast(_y)); @@ -274,7 +286,9 @@ void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept _rp = len; // len is always less than 16 here } +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul,aes"))) +#endif void AES::GMAC::p_aesNIFinish(uint8_t tag[16]) noexcept { __m128i y = _mm_loadu_si128(reinterpret_cast(_y)); @@ -345,7 +359,9 @@ void AES::GMAC::p_aesNIFinish(uint8_t tag[16]) noexcept _mm_storeu_si128(reinterpret_cast<__m128i *>(tag), _mm_xor_si128(_mm_shuffle_epi8(t4, s_sseSwapBytes), encIV)); } +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes"))) +#endif void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) noexcept { const __m128i dd = _mm_set_epi64x(0, (long long)_ctr[0]); @@ -542,7 +558,9 @@ void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) n _ctr[1] = Utils::hton(c1); } +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul"))) +#endif void AES::p_init_aesni(const uint8_t *key) noexcept { __m128i t1, t2, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13; @@ -604,7 +622,9 @@ void AES::p_init_aesni(const uint8_t *key) noexcept p_k.ni.h2[3] = _mm_xor_si128(_mm_shuffle_epi32(hhhh, 78), hhhh); } +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul"))) +#endif void AES::p_encrypt_aesni(const void *const in, void *const out) const noexcept { __m128i tmp = _mm_loadu_si128((const __m128i *)in); @@ -625,7 +645,9 @@ void AES::p_encrypt_aesni(const void *const in, void *const out) const noexcept _mm_storeu_si128((__m128i *)out, _mm_aesenclast_si128(tmp, p_k.ni.k[14])); } +#ifdef __GNUC__ __attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul"))) +#endif void AES::p_decrypt_aesni(const void *in, void *out) const noexcept { __m128i tmp = _mm_loadu_si128((const __m128i *)in); diff --git a/windows/ZeroTierOne/ZeroTierOne.vcxproj b/windows/ZeroTierOne/ZeroTierOne.vcxproj index 75dcad6ce..9b2b996ab 100644 --- a/windows/ZeroTierOne/ZeroTierOne.vcxproj +++ b/windows/ZeroTierOne/ZeroTierOne.vcxproj @@ -51,6 +51,8 @@ + + @@ -443,7 +445,7 @@ ZT_EXPORT;FD_SETSIZE=1024;STATICLIB;ZT_SOFTWARE_UPDATE_DEFAULT="apply";ZT_SALSA20_SSE;ZT_USE_MINIUPNPC;MINIUPNP_STATICLIB;WIN32;NOMINMAX;ZT_BUILD_PLATFORM=2;ZT_BUILD_ARCHITECTURE=2;%(PreprocessorDefinitions) MultiThreaded - NotSet + StreamingSIMDExtensions2 true AnySuitable Speed diff --git a/windows/ZeroTierOne/ZeroTierOne.vcxproj.filters b/windows/ZeroTierOne/ZeroTierOne.vcxproj.filters index f474c19b5..cca0c5ce3 100644 --- a/windows/ZeroTierOne/ZeroTierOne.vcxproj.filters +++ b/windows/ZeroTierOne/ZeroTierOne.vcxproj.filters @@ -285,6 +285,12 @@ Source Files\node + + Source Files\node + + + Source Files\node +