mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-07-27 04:32:51 +02:00
Some formatting and perf fixes.
This commit is contained in:
parent
0290446000
commit
ea650ad7c5
6 changed files with 100 additions and 148 deletions
|
@ -219,18 +219,6 @@ if(NOT PACKAGE_STATIC)
|
|||
endif(APPLE)
|
||||
endif(WIN32)
|
||||
|
||||
if (
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64" OR
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "amd64" OR
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "AMD64" OR
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "X86_64" OR
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "x64" OR
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "X64"
|
||||
)
|
||||
message("++ Adding flags for processor ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
add_compile_options(-maes -mrdrnd -mpclmul)
|
||||
endif()
|
||||
|
||||
if (
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR
|
||||
CMAKE_SYSTEM_PROCESSOR MATCHES "arm64" OR
|
||||
|
@ -252,55 +240,6 @@ if(NOT PACKAGE_STATIC)
|
|||
# add_subdirectory(controller)
|
||||
add_subdirectory(osdep)
|
||||
|
||||
#file(GLOB go_src
|
||||
# ${CMAKE_SOURCE_DIR}/cmd/*.go
|
||||
# ${CMAKE_SOURCE_DIR}/cmd/cmd/*.go
|
||||
# ${CMAKE_SOURCE_DIR}/pkg/zerotier/*.go
|
||||
#)
|
||||
#file(GLOB go_zt_service_tests_cmd_src
|
||||
# ${CMAKE_SOURCE_DIR}/cmd/zt_service_tests/*.go
|
||||
#)
|
||||
|
||||
#if(WIN32)
|
||||
# set(GO_EXE_NAME "zerotier.exe")
|
||||
# set(GO_SERVICE_TESTS_EXE_NAME "zt_service_tests.exe")
|
||||
# set(GO_EXTRA_LIBRARIES "-lstdc++ -lwsock32 -lws2_32 -liphlpapi -lole32 -loleaut32 -lrpcrt4 -luuid")
|
||||
#else(WIN32)
|
||||
# set(GO_EXE_NAME "zerotier")
|
||||
# set(GO_SERVICE_TESTS_EXE_NAME "zt_service_tests")
|
||||
# if(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
# set(GO_EXTRA_LIBRARIES "-lstdc++")
|
||||
# if(BUILD_ARM_V5)
|
||||
# set(GO_EXTRA_LIBRARIES
|
||||
# ${GO_EXTRA_LIBRARIES}
|
||||
# "-latomic"
|
||||
# )
|
||||
# endif(BUILD_ARM_V5)
|
||||
# else()
|
||||
# set(GO_EXTRA_LIBRARIES "-lc++" "-lm")
|
||||
# endif()
|
||||
#endif(WIN32)
|
||||
|
||||
#add_custom_target(
|
||||
# zt_service_tests ALL
|
||||
# BYPRODUCTS ${CMAKE_BINARY_DIR}/zt_service_tests
|
||||
# SOURCES ${go_src} ${go_zt_service_tests_cmd_src}
|
||||
# COMMAND ${CMAKE_COMMAND} -E env ${GOARCH} ${GOARM} CGO_ENABLED=1 CGO_CFLAGS=\"-O3\" CGO_LDFLAGS=\"$<TARGET_FILE:zt_core> $<TARGET_FILE:zt_controller> $<TARGET_FILE:zt_service_io_core> $<TARGET_FILE:zt_osdep> ${GO_EXTRA_LIBRARIES}\" ${GO} build -mod=vendor ${GOFLAGS} -o ${CMAKE_BINARY_DIR}/${GO_SERVICE_TESTS_EXE_NAME} ${go_zt_service_tests_cmd_src}
|
||||
# COMMENT "Compiling zt_service_tests (Go/cgo self-tests)..."
|
||||
#)
|
||||
#add_dependencies(zt_service_tests zt_osdep zt_core zt_controller zt_service_io_core)
|
||||
|
||||
#add_custom_target(
|
||||
# zerotier ALL
|
||||
# BYPRODUCTS ${CMAKE_BINARY_DIR}/zerotier
|
||||
# SOURCES ${go_src}
|
||||
# COMMAND ${CMAKE_COMMAND} -E env ${GOARCH} ${GOARM} CGO_ENABLED=1 CGO_CFLAGS=\"-O3\" CGO_LDFLAGS=\"$<TARGET_FILE:zt_core> $<TARGET_FILE:zt_controller> $<TARGET_FILE:zt_service_io_core> $<TARGET_FILE:zt_osdep> ${GO_EXTRA_LIBRARIES}\" ${GO} build -mod=vendor ${GOFLAGS} -o ${CMAKE_BINARY_DIR}/${GO_EXE_NAME} ${CMAKE_SOURCE_DIR}/cmd/zerotier/zerotier.go
|
||||
# COMMENT "Compiling Go Code..."
|
||||
#)
|
||||
#add_dependencies(zerotier zt_osdep zt_core zt_controller zt_service_io_core)
|
||||
|
||||
#install(PROGRAMS ${CMAKE_CURRENT_BINARY_DIR}/zerotier DESTINATION bin)
|
||||
|
||||
else(NOT PACKAGE_STATIC)
|
||||
|
||||
if(BUILD_32BIT)
|
||||
|
|
|
@ -30,7 +30,7 @@ namespace {
|
|||
const __m128i s_sseSwapBytes = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul")))
|
||||
__attribute__((__target__("sse,sse2,pclmul")))
|
||||
#endif
|
||||
__m128i
|
||||
p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
|
||||
|
@ -69,10 +69,13 @@ p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
|
|||
#define ZT_AES_VAES512 1
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("sse4,aes,avx,avx2,vaes,avx512f,avx512bw")))
|
||||
__attribute__((__target__("sse,sse2,sse3,ssse3,sse4,sse4.1,sse4.2,aes,avx,avx2,vaes,avx512f,avx512bw")))
|
||||
#endif
|
||||
void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, const uint8_t *&in, uint8_t *&out, const __m128i *const k) noexcept
|
||||
void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &_c1, const uint8_t *&_in, uint8_t *&_out, const __m128i *const k) noexcept
|
||||
{
|
||||
uint64_t c1 = _c1;
|
||||
const uint8_t *in = _in;
|
||||
uint8_t *out = _out;
|
||||
const __m512i kk0 = _mm512_broadcast_i32x4(k[0]);
|
||||
const __m512i kk1 = _mm512_broadcast_i32x4(k[1]);
|
||||
const __m512i kk2 = _mm512_broadcast_i32x4(k[2]);
|
||||
|
@ -88,14 +91,12 @@ void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, co
|
|||
const __m512i kk12 = _mm512_broadcast_i32x4(k[12]);
|
||||
const __m512i kk13 = _mm512_broadcast_i32x4(k[13]);
|
||||
const __m512i kk14 = _mm512_broadcast_i32x4(k[14]);
|
||||
do {
|
||||
__m512i p0 = _mm512_loadu_si512(reinterpret_cast<const __m512i *>(in));
|
||||
_mm_prefetch(in, _MM_HINT_T0);
|
||||
for (unsigned int i = 0, c = (len >> 6U); i < c; ++i) {
|
||||
__m512i d0 = _mm512_set_epi64(
|
||||
(long long)Utils::hton(c1 + 3ULL), (long long)c0, (long long)Utils::hton(c1 + 2ULL), (long long)c0,
|
||||
(long long)Utils::hton(c1 + 1ULL), (long long)c0, (long long)Utils::hton(c1), (long long)c0);
|
||||
c1 += 4;
|
||||
in += 64;
|
||||
len -= 64;
|
||||
d0 = _mm512_xor_si512(d0, kk0);
|
||||
d0 = _mm512_aesenc_epi128(d0, kk1);
|
||||
d0 = _mm512_aesenc_epi128(d0, kk2);
|
||||
|
@ -111,18 +112,29 @@ void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, co
|
|||
d0 = _mm512_aesenc_epi128(d0, kk12);
|
||||
d0 = _mm512_aesenc_epi128(d0, kk13);
|
||||
d0 = _mm512_aesenclast_epi128(d0, kk14);
|
||||
_mm512_storeu_si512(reinterpret_cast<__m512i *>(out), _mm512_xor_si512(p0, d0));
|
||||
_mm512_storeu_si512(
|
||||
reinterpret_cast<__m512i *>(out),
|
||||
_mm512_xor_si512(_mm512_loadu_si512(reinterpret_cast<const __m512i *>(in)), d0));
|
||||
in += 64;
|
||||
out += 64;
|
||||
} while (likely(len >= 64));
|
||||
_mm_prefetch(in, _MM_HINT_T0);
|
||||
}
|
||||
_c1 = c1;
|
||||
_in = in;
|
||||
_out = out;
|
||||
len &= 63U;
|
||||
}
|
||||
|
||||
#define ZT_AES_VAES256 1
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("sse4,aes,avx,avx2,vaes")))
|
||||
__attribute__((__target__("sse,sse2,sse3,ssse3,sse4,sse4.1,sse4.2,aes,avx,avx2,vaes")))
|
||||
#endif
|
||||
void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, const uint8_t *&in, uint8_t *&out, const __m128i *const k) noexcept
|
||||
void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &_c1, const uint8_t *&_in, uint8_t *&_out, const __m128i *const k) noexcept
|
||||
{
|
||||
uint64_t c1 = _c1;
|
||||
const uint8_t *in = _in;
|
||||
uint8_t *out = _out;
|
||||
const __m256i kk0 = _mm256_broadcastsi128_si256(k[0]);
|
||||
const __m256i kk1 = _mm256_broadcastsi128_si256(k[1]);
|
||||
const __m256i kk2 = _mm256_broadcastsi128_si256(k[2]);
|
||||
|
@ -138,16 +150,13 @@ void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, co
|
|||
const __m256i kk12 = _mm256_broadcastsi128_si256(k[12]);
|
||||
const __m256i kk13 = _mm256_broadcastsi128_si256(k[13]);
|
||||
const __m256i kk14 = _mm256_broadcastsi128_si256(k[14]);
|
||||
do {
|
||||
__m256i p0 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(in));
|
||||
__m256i p1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(in + 32));
|
||||
_mm_prefetch(in, _MM_HINT_T0);
|
||||
for (unsigned int i = 0, c = (len >> 6U); i < c; ++i) {
|
||||
__m256i d0 = _mm256_set_epi64x(
|
||||
(long long)Utils::hton(c1 + 1ULL), (long long)c0, (long long)Utils::hton(c1), (long long)c0);
|
||||
__m256i d1 = _mm256_set_epi64x(
|
||||
(long long)Utils::hton(c1 + 3ULL), (long long)c0, (long long)Utils::hton(c1 + 2ULL), (long long)c0);
|
||||
c1 += 4;
|
||||
in += 64;
|
||||
len -= 64;
|
||||
d0 = _mm256_xor_si256(d0, kk0);
|
||||
d1 = _mm256_xor_si256(d1, kk0);
|
||||
d0 = _mm256_aesenc_epi128(d0, kk1);
|
||||
|
@ -178,16 +187,26 @@ void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, co
|
|||
d1 = _mm256_aesenc_epi128(d1, kk13);
|
||||
d0 = _mm256_aesenclast_epi128(d0, kk14);
|
||||
d1 = _mm256_aesenclast_epi128(d1, kk14);
|
||||
_mm256_storeu_si256(reinterpret_cast<__m256i *>(out), _mm256_xor_si256(d0, p0));
|
||||
_mm256_storeu_si256(reinterpret_cast<__m256i *>(out + 32), _mm256_xor_si256(d1, p1));
|
||||
_mm256_storeu_si256(
|
||||
reinterpret_cast<__m256i *>(out),
|
||||
_mm256_xor_si256(d0, _mm256_loadu_si256(reinterpret_cast<const __m256i *>(in))));
|
||||
_mm256_storeu_si256(
|
||||
reinterpret_cast<__m256i *>(out + 32),
|
||||
_mm256_xor_si256(d1, _mm256_loadu_si256(reinterpret_cast<const __m256i *>(in + 32))));
|
||||
in += 64;
|
||||
out += 64;
|
||||
} while (likely(len >= 64));
|
||||
_mm_prefetch(in, _MM_HINT_T0);
|
||||
}
|
||||
_c1 = c1;
|
||||
_in = in;
|
||||
_out = out;
|
||||
len &= 63U;
|
||||
}
|
||||
|
||||
#endif // does compiler support AVX2 and AVX512 AES intrinsics?
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
__attribute__((__target__("sse,sse2")))
|
||||
#endif
|
||||
__m128i
|
||||
p_init256_1_aesni(__m128i a, __m128i b) noexcept
|
||||
|
@ -205,7 +224,7 @@ p_init256_1_aesni(__m128i a, __m128i b) noexcept
|
|||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
__attribute__((__target__("sse,sse2,aes")))
|
||||
#endif
|
||||
__m128i
|
||||
p_init256_2_aesni(__m128i a, __m128i b) noexcept
|
||||
|
@ -226,7 +245,7 @@ p_init256_2_aesni(__m128i a, __m128i b) noexcept
|
|||
} // anonymous namespace
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul")))
|
||||
__attribute__((__target__("sse,sse2,pclmul,aes")))
|
||||
#endif
|
||||
void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept
|
||||
{
|
||||
|
@ -259,6 +278,7 @@ void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept
|
|||
const __m128i hhhh2 = _aes.p_k.ni.h2[3];
|
||||
const uint8_t *const end64 = in + (len & ~((unsigned int)63));
|
||||
len &= 63U;
|
||||
_mm_prefetch(in, _MM_HINT_T0);
|
||||
do {
|
||||
__m128i d1 = _mm_shuffle_epi8(_mm_xor_si128(y, _mm_loadu_si128(reinterpret_cast<const __m128i *>(in))), sb);
|
||||
__m128i d2 = _mm_shuffle_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(in + 16)), sb);
|
||||
|
@ -294,6 +314,7 @@ void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept
|
|||
_mm_xor_si128(_mm_srli_epi32(a, 1), _mm_srli_si128(c, 4)),
|
||||
_mm_xor_si128(_mm_srli_epi32(a, 2), _mm_srli_epi32(a, 7)))));
|
||||
y = _mm_shuffle_epi8(b, sb);
|
||||
_mm_prefetch(in, _MM_HINT_T0);
|
||||
} while (likely(in != end64));
|
||||
}
|
||||
|
||||
|
@ -312,7 +333,7 @@ void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept
|
|||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul,aes")))
|
||||
__attribute__((__target__("sse,sse2,pclmul,aes")))
|
||||
#endif
|
||||
void AES::GMAC::p_aesNIFinish(uint8_t tag[16]) noexcept
|
||||
{
|
||||
|
@ -385,29 +406,27 @@ void AES::GMAC::p_aesNIFinish(uint8_t tag[16]) noexcept
|
|||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes")))
|
||||
__attribute__((__target__("sse,sse2,aes,pclmul")))
|
||||
__attribute__((optimize("unroll-all-loops")))
|
||||
#endif
|
||||
void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) noexcept
|
||||
{
|
||||
const __m128i dd = _mm_set_epi64x(0, (long long)_ctr[0]);
|
||||
uint64_t c1 = Utils::ntoh(_ctr[1]);
|
||||
|
||||
const __m128i *const k = _aes.p_k.ni.k;
|
||||
const __m128i k0 = k[0];
|
||||
const __m128i k1 = k[1];
|
||||
const __m128i k2 = k[2];
|
||||
const __m128i k3 = k[3];
|
||||
const __m128i k4 = k[4];
|
||||
const __m128i k5 = k[5];
|
||||
const __m128i k6 = k[6];
|
||||
const __m128i k7 = k[7];
|
||||
const __m128i k8 = k[8];
|
||||
const __m128i k9 = k[9];
|
||||
const __m128i k10 = k[10];
|
||||
const __m128i k11 = k[11];
|
||||
const __m128i k12 = k[12];
|
||||
const __m128i k13 = k[13];
|
||||
const __m128i k14 = k[14];
|
||||
uint64_t c0 = _ctr[0], c1 = Utils::ntoh(_ctr[1]);
|
||||
const __m128i k0 = _aes.p_k.ni.k[0];
|
||||
const __m128i k1 = _aes.p_k.ni.k[1];
|
||||
const __m128i k2 = _aes.p_k.ni.k[2];
|
||||
const __m128i k3 = _aes.p_k.ni.k[3];
|
||||
const __m128i k4 = _aes.p_k.ni.k[4];
|
||||
const __m128i k5 = _aes.p_k.ni.k[5];
|
||||
const __m128i k6 = _aes.p_k.ni.k[6];
|
||||
const __m128i k7 = _aes.p_k.ni.k[7];
|
||||
const __m128i k8 = _aes.p_k.ni.k[8];
|
||||
const __m128i k9 = _aes.p_k.ni.k[9];
|
||||
const __m128i k10 = _aes.p_k.ni.k[10];
|
||||
const __m128i k11 = _aes.p_k.ni.k[11];
|
||||
const __m128i k12 = _aes.p_k.ni.k[12];
|
||||
const __m128i k13 = _aes.p_k.ni.k[13];
|
||||
const __m128i k14 = _aes.p_k.ni.k[14];
|
||||
|
||||
// Complete any unfinished blocks from previous calls to crypt().
|
||||
unsigned int totalLen = _len;
|
||||
|
@ -421,7 +440,7 @@ void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) n
|
|||
--len;
|
||||
out[totalLen++] = *(in++);
|
||||
if (!(totalLen & 15U)) {
|
||||
__m128i d0 = _mm_insert_epi64(dd, (long long)Utils::hton(c1++), 1);
|
||||
__m128i d0 = _mm_set_epi64x((long long)Utils::hton(c1++), (long long)c0);
|
||||
d0 = _mm_xor_si128(d0, k0);
|
||||
d0 = _mm_aesenc_si128(d0, k1);
|
||||
d0 = _mm_aesenc_si128(d0, k2);
|
||||
|
@ -450,36 +469,31 @@ void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) n
|
|||
|
||||
if (likely(len >= 64)) {
|
||||
#if defined(ZT_AES_VAES512) && defined(ZT_AES_VAES256)
|
||||
if (Utils::CPUID.vaes) {
|
||||
if (Utils::CPUID.avx512f) {
|
||||
p_aesCtrInnerVAES512(len, _ctr[0], c1, in, out, k);
|
||||
}
|
||||
else {
|
||||
p_aesCtrInnerVAES256(len, _ctr[0], c1, in, out, k);
|
||||
}
|
||||
goto skip_conventional_aesni_64;
|
||||
if (Utils::CPUID.avx512f) {
|
||||
p_aesCtrInnerVAES512(len, _ctr[0], c1, in, out, _aes.p_k.ni.k);
|
||||
}
|
||||
else if (Utils::CPUID.vaes) {
|
||||
p_aesCtrInnerVAES256(len, _ctr[0], c1, in, out, _aes.p_k.ni.k);
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
|
||||
#if !defined(ZT_AES_VAES512) && defined(ZT_AES_VAES256)
|
||||
if (Utils::CPUID.vaes && (len >= 256)) {
|
||||
if (Utils::CPUID.vaes) {
|
||||
p_aesCtrInnerVAES256(len, _ctr[0], c1, in, out, k);
|
||||
goto skip_conventional_aesni_64;
|
||||
}
|
||||
else {
|
||||
#endif
|
||||
|
||||
const uint8_t *const eof64 = in + (len & ~((unsigned int)63));
|
||||
len &= 63;
|
||||
__m128i d0, d1, d2, d3;
|
||||
_mm_prefetch(in, _MM_HINT_T0);
|
||||
do {
|
||||
const uint64_t c10 = Utils::hton(c1);
|
||||
const uint64_t c11 = Utils::hton(c1 + 1ULL);
|
||||
const uint64_t c12 = Utils::hton(c1 + 2ULL);
|
||||
const uint64_t c13 = Utils::hton(c1 + 3ULL);
|
||||
d0 = _mm_insert_epi64(dd, (long long)c10, 1);
|
||||
d1 = _mm_insert_epi64(dd, (long long)c11, 1);
|
||||
d2 = _mm_insert_epi64(dd, (long long)c12, 1);
|
||||
d3 = _mm_insert_epi64(dd, (long long)c13, 1);
|
||||
__m128i d0 = _mm_set_epi64x((long long)Utils::hton(c1), (long long)c0);
|
||||
__m128i d1 = _mm_set_epi64x((long long)Utils::hton(c1 + 1ULL), (long long)c0);
|
||||
__m128i d2 = _mm_set_epi64x((long long)Utils::hton(c1 + 2ULL), (long long)c0);
|
||||
__m128i d3 = _mm_set_epi64x((long long)Utils::hton(c1 + 3ULL), (long long)c0);
|
||||
c1 += 4;
|
||||
d0 = _mm_xor_si128(d0, k0);
|
||||
d1 = _mm_xor_si128(d1, k0);
|
||||
|
@ -537,25 +551,30 @@ void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) n
|
|||
d1 = _mm_aesenc_si128(d1, k13);
|
||||
d2 = _mm_aesenc_si128(d2, k13);
|
||||
d3 = _mm_aesenc_si128(d3, k13);
|
||||
d0 = _mm_xor_si128(_mm_aesenclast_si128(d0, k14), _mm_loadu_si128(reinterpret_cast<const __m128i *>(in)));
|
||||
d0 = _mm_xor_si128(
|
||||
_mm_aesenclast_si128(d0, k14), _mm_loadu_si128(reinterpret_cast<const __m128i *>(in)));
|
||||
d1 = _mm_xor_si128(
|
||||
_mm_aesenclast_si128(d1, k14), _mm_loadu_si128(reinterpret_cast<const __m128i *>(in + 16)));
|
||||
d2 = _mm_xor_si128(
|
||||
_mm_aesenclast_si128(d2, k14), _mm_loadu_si128(reinterpret_cast<const __m128i *>(in + 32)));
|
||||
d3 = _mm_xor_si128(
|
||||
_mm_aesenclast_si128(d3, k14), _mm_loadu_si128(reinterpret_cast<const __m128i *>(in + 48)));
|
||||
in += 64;
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out), d0);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + 16), d1);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + 32), d2);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(out + 48), d3);
|
||||
in += 64;
|
||||
out += 64;
|
||||
_mm_prefetch(in, _MM_HINT_T0);
|
||||
} while (likely(in != eof64));
|
||||
|
||||
#if defined(ZT_AES_VAES512) || defined(ZT_AES_VAES256)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
skip_conventional_aesni_64:
|
||||
while (len >= 16) {
|
||||
__m128i d0 = _mm_insert_epi64(dd, (long long)Utils::hton(c1++), 1);
|
||||
__m128i d0 = _mm_set_epi64x((long long)Utils::hton(c1++), (long long)c0);
|
||||
d0 = _mm_xor_si128(d0, k0);
|
||||
d0 = _mm_aesenc_si128(d0, k1);
|
||||
d0 = _mm_aesenc_si128(d0, k2);
|
||||
|
@ -588,7 +607,7 @@ skip_conventional_aesni_64:
|
|||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
__attribute__((__target__("sse,sse2,aes,pclmul")))
|
||||
#endif
|
||||
void AES::p_init_aesni(const uint8_t *key) noexcept
|
||||
{
|
||||
|
@ -652,7 +671,7 @@ void AES::p_init_aesni(const uint8_t *key) noexcept
|
|||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
__attribute__((__target__("sse,sse2,aes")))
|
||||
#endif
|
||||
void AES::p_encrypt_aesni(const void *const in, void *const out) const noexcept
|
||||
{
|
||||
|
@ -675,7 +694,7 @@ void AES::p_encrypt_aesni(const void *const in, void *const out) const noexcept
|
|||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
__attribute__((__target__("sse,sse2,aes")))
|
||||
#endif
|
||||
void AES::p_decrypt_aesni(const void *in, void *out) const noexcept
|
||||
{
|
||||
|
|
|
@ -87,19 +87,9 @@ class Address : public TriviallyCopyable {
|
|||
*/
|
||||
ZT_INLINE char *toString(char s[ZT_ADDRESS_STRING_SIZE_MAX]) const noexcept
|
||||
{
|
||||
const uint64_t a = _a;
|
||||
const unsigned int m = 0xf;
|
||||
s[0] = Utils::HEXCHARS[(unsigned int)(a >> 36U) & m];
|
||||
s[1] = Utils::HEXCHARS[(unsigned int)(a >> 32U) & m];
|
||||
s[2] = Utils::HEXCHARS[(unsigned int)(a >> 28U) & m];
|
||||
s[3] = Utils::HEXCHARS[(unsigned int)(a >> 24U) & m];
|
||||
s[4] = Utils::HEXCHARS[(unsigned int)(a >> 20U) & m];
|
||||
s[5] = Utils::HEXCHARS[(unsigned int)(a >> 16U) & m];
|
||||
s[6] = Utils::HEXCHARS[(unsigned int)(a >> 12U) & m];
|
||||
s[7] = Utils::HEXCHARS[(unsigned int)(a >> 8U) & m];
|
||||
s[8] = Utils::HEXCHARS[(unsigned int)(a >> 4U) & m];
|
||||
s[9] = Utils::HEXCHARS[(unsigned int)a & m];
|
||||
s[10] = 0;
|
||||
for (unsigned int i = 0; i < 10;) {
|
||||
s[i++] = Utils::HEXCHARS[(uintptr_t)(_a >> (36U - (i * 4U))) & 0xfU];
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
|
|
|
@ -76,6 +76,7 @@
|
|||
defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) \
|
||||
|| defined(__AMD64__) || defined(_M_X64))
|
||||
#define ZT_ARCH_X64 1
|
||||
#include <smmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
|
@ -203,7 +204,7 @@
|
|||
#define ZT_INLINE
|
||||
#else
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
#define ZT_INLINE __attribute__((always_inline)) inline
|
||||
#define ZT_INLINE inline __attribute__((always_inline))
|
||||
#else
|
||||
#define ZT_INLINE inline
|
||||
#endif
|
||||
|
|
|
@ -302,6 +302,9 @@ unsigned int unhex(const char *h, unsigned int hlen, void *buf, unsigned int buf
|
|||
#define ZT_GETSECURERANDOM_STATE_SIZE 64
|
||||
#define ZT_GETSECURERANDOM_ITERATIONS_PER_GENERATOR 1048576
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("sse,sse2,rdrnd")))
|
||||
#endif
|
||||
void getSecureRandom(void *const buf, unsigned int bytes) noexcept
|
||||
{
|
||||
static Mutex globalLock;
|
||||
|
|
|
@ -17,17 +17,17 @@
|
|||
/**
|
||||
* Major version
|
||||
*/
|
||||
#define ZEROTIER_VERSION_MAJOR @ZEROTIER_VERSION_MAJOR @
|
||||
#define ZEROTIER_VERSION_MAJOR @ZEROTIER_VERSION_MAJOR@
|
||||
|
||||
/**
|
||||
* Minor version
|
||||
*/
|
||||
#define ZEROTIER_VERSION_MINOR @ZEROTIER_VERSION_MINOR @
|
||||
#define ZEROTIER_VERSION_MINOR @ZEROTIER_VERSION_MINOR@
|
||||
|
||||
/**
|
||||
* Revision
|
||||
*/
|
||||
#define ZEROTIER_VERSION_REVISION @ZEROTIER_VERSION_REVISION @
|
||||
#define ZEROTIER_VERSION_REVISION @ZEROTIER_VERSION_REVISION@
|
||||
|
||||
/**
|
||||
* Build version
|
||||
|
@ -36,6 +36,6 @@
|
|||
* to force a minor update without an actual version number change. It's
|
||||
* not part of the actual release version number.
|
||||
*/
|
||||
#define ZEROTIER_VERSION_BUILD @ZEROTIER_VERSION_BUILD @
|
||||
#define ZEROTIER_VERSION_BUILD @ZEROTIER_VERSION_BUILD@
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue