diff --git a/include/ZeroTierCore.h b/include/ZeroTierCore.h index 39d18cd38..30f3b3818 100644 --- a/include/ZeroTierCore.h +++ b/include/ZeroTierCore.h @@ -25,7 +25,6 @@ #else #include #include -#include #include #include #include diff --git a/node/AES.cpp b/node/AES.cpp index d583f37cd..83661731b 100644 --- a/node/AES.cpp +++ b/node/AES.cpp @@ -468,7 +468,11 @@ void AES::GMAC::finish(uint8_t tag[16]) noexcept // AES-CTR ------------------------------------------------------------------------------------------------------------ -static __attribute__((__target__("sse4,avx,avx2,vaes,avx512f"))) void p_aesCtrInnerVAES512(unsigned int &len,uint64_t &c0,uint64_t &c1,const uint8_t *&in,uint8_t *&out,const __m128i *const k) noexcept +#ifdef ZT_AES_AESNI + +static +__attribute__((__target__("sse4,avx,avx2,vaes,avx512f"))) +void p_aesCtrInnerVAES512(unsigned int &len,uint64_t &c0,uint64_t &c1,const uint8_t *&in,uint8_t *&out,const __m128i *const k) noexcept { const __m512i kk0 = _mm512_broadcast_i32x4(k[0]); const __m512i kk1 = _mm512_broadcast_i32x4(k[1]); @@ -515,7 +519,9 @@ static __attribute__((__target__("sse4,avx,avx2,vaes,avx512f"))) void p_aesCtrIn } while (len >= 64); } -static __attribute__((__target__("sse4,avx,avx2,vaes"))) void p_aesCtrInnerVAES256(unsigned int &len,uint64_t &c0,uint64_t &c1,const uint8_t *&in,uint8_t *&out,const __m128i *const k) noexcept +static +__attribute__((__target__("sse4,avx,avx2,vaes"))) +void p_aesCtrInnerVAES256(unsigned int &len,uint64_t &c0,uint64_t &c1,const uint8_t *&in,uint8_t *&out,const __m128i *const k) noexcept { const __m256i kk0 = _mm256_broadcastsi128_si256(k[0]); const __m256i kk1 = _mm256_broadcastsi128_si256(k[1]); @@ -681,6 +687,8 @@ static void p_aesCtrInner128(unsigned int &len,uint64_t &c0,uint64_t &c1,const u } while (len >= 64); } +#endif + void AES::CTR::crypt(const void *const input,unsigned int len) noexcept { const uint8_t *in = reinterpret_cast(input); @@ -733,7 +741,7 @@ void AES::CTR::crypt(const void *const input,unsigned int len) noexcept _len = totalLen + len; if (likely(len >= 64)) { - if (Utils::CPUID.vaes) { // is only true if AVX is also present + if (Utils::CPUID.vaes) { if ((!Utils::CPUID.avx512f)||((len < 1024))) { p_aesCtrInnerVAES256(len,c0,c1,in,out,k); } else { diff --git a/node/Utils.cpp b/node/Utils.cpp index ace2e05f0..7f86cfd58 100644 --- a/node/Utils.cpp +++ b/node/Utils.cpp @@ -36,6 +36,7 @@ namespace Utils { CPUIDRegisters::CPUIDRegisters() noexcept { uint32_t eax,ebx,ecx,edx; + #ifdef __WINDOWS__ int regs[4]; __cpuid(regs,1); @@ -50,11 +51,17 @@ CPUIDRegisters::CPUIDRegisters() noexcept : "a"(1),"c"(0) ); #endif + rdrand = ((ecx & (1U << 30U)) != 0); aes = ( ((ecx & (1U << 25U)) != 0) && ((ecx & (1U << 19U)) != 0) && ((ecx & (1U << 1U)) != 0) ); avx = ((ecx & (1U << 25U)) != 0); + #ifdef __WINDOWS__ -TODO + __cpuid(regs,7); + eax = (uint32_t)regs[0]; + ebx = (uint32_t)regs[1]; + ecx = (uint32_t)regs[2]; + edx = (uint32_t)regs[3]; #else __asm__ __volatile__ ( "cpuid" @@ -62,6 +69,7 @@ TODO : "a"(7),"c"(0) ); #endif + vaes = aes && avx && ((ecx & (1U << 9U)) != 0); vpclmulqdq = aes && avx && ((ecx & (1U << 10U)) != 0); avx2 = avx && ((ebx & (1U << 5U)) != 0); diff --git a/node/VL1.cpp b/node/VL1.cpp index bf5d4d8cc..b4889ab43 100644 --- a/node/VL1.cpp +++ b/node/VL1.cpp @@ -16,12 +16,13 @@ #include "Node.hpp" #include "Topology.hpp" #include "VL2.hpp" +#include "AES.hpp" #include "Salsa20.hpp" #include "LZ4.hpp" #include "Poly1305.hpp" +#include "SHA512.hpp" #include "Identity.hpp" #include "SelfAwareness.hpp" -#include "SHA512.hpp" #include "Peer.hpp" #include "Path.hpp" #include "Expect.hpp" @@ -40,6 +41,7 @@ struct p_SalsaPolyCopyFunction Salsa20 s20; Poly1305 poly1305; unsigned int hdrRemaining; + ZT_INLINE p_SalsaPolyCopyFunction(const void *salsaKey, const void *salsaIv) : s20(salsaKey, salsaIv), poly1305(), @@ -69,6 +71,7 @@ struct p_PolyCopyFunction { Poly1305 poly1305; unsigned int hdrRemaining; + ZT_INLINE p_PolyCopyFunction(const void *salsaKey, const void *salsaIv) : poly1305(), hdrRemaining(ZT_PROTO_PACKET_ENCRYPTED_SECTION_START) @@ -100,7 +103,7 @@ VL1::VL1(const RuntimeEnvironment *renv) : { } -void VL1::onRemotePacket(void *const tPtr, const int64_t localSocket, const InetAddress &fromAddr, SharedPtr &data, const unsigned int len) +void VL1::onRemotePacket(void *const tPtr, const int64_t localSocket, const InetAddress &fromAddr, SharedPtr &data, const unsigned int len) noexcept { const SharedPtr path(RR->topology->path(localSocket, fromAddr)); const int64_t now = RR->node->now(); @@ -112,18 +115,6 @@ void VL1::onRemotePacket(void *const tPtr, const int64_t localSocket, const Inet // for valid data packets. This may allow the compiler to generate very slightly // faster code for that path. - /* - * Packet format: - * <[8] 64-bit packet ID / crypto IV> - * <[5] destination ZT address> - * <[5] source ZT address> - * <[1] outer visible flags, cipher, and hop count (bits: FFCCHHH)> - * <[8] 64-bit MAC (or trusted path ID in trusted path mode)> - * [... -- begin encryption envelope -- ...] - * <[1] inner envelope flags (MS 3 bits) and verb (LS 5 bits)> - * [... verb-specific payload ...] - */ - try { if (unlikely(len < ZT_PROTO_MIN_FRAGMENT_LENGTH)) return; @@ -132,7 +123,7 @@ void VL1::onRemotePacket(void *const tPtr, const int64_t localSocket, const Inet const uint64_t packetId = Utils::loadAsIsEndian(data->unsafeData + ZT_PROTO_PACKET_ID_INDEX); static_assert((ZT_PROTO_PACKET_DESTINATION_INDEX + ZT_ADDRESS_LENGTH) < ZT_PROTO_MIN_FRAGMENT_LENGTH, "overflow"); - Address destination(data->unsafeData + ZT_PROTO_PACKET_DESTINATION_INDEX); + const Address destination(data->unsafeData + ZT_PROTO_PACKET_DESTINATION_INDEX); if (destination != RR->identity.address()) { m_relay(tPtr, path, destination, data, len); return; @@ -419,7 +410,7 @@ void VL1::onRemotePacket(void *const tPtr, const int64_t localSocket, const Inet // This is rate limited by virtue of the retry rate limit timer. if (pktSize <= 0) pktSize = pktv.mergeCopy(*pkt); - if (pktSize >= ZT_PROTO_MIN_PACKET_LENGTH) { + if (likely(pktSize >= ZT_PROTO_MIN_PACKET_LENGTH)) { ZT_SPEW("authentication failed or no peers match, queueing WHOIS for %s", source.toString().c_str()); bool sendPending; { @@ -464,23 +455,22 @@ void VL1::m_sendPendingWhois(void *tPtr, int64_t now) } } - if (toSend.empty()) - return; - - const SharedPtr key(root->key()); - uint8_t outp[ZT_DEFAULT_UDP_MTU - ZT_PROTO_MIN_PACKET_LENGTH]; - Vector
::iterator a(toSend.begin()); - while (a != toSend.end()) { - const uint64_t packetId = key->nextMessage(RR->identity.address(), root->address()); - int p = Protocol::newPacket(outp, packetId, root->address(), RR->identity.address(), Protocol::VERB_WHOIS); - while ((a != toSend.end()) && (p < (sizeof(outp) - ZT_ADDRESS_LENGTH))) { - a->copyTo(outp + p); - ++a; - p += ZT_ADDRESS_LENGTH; + if (!toSend.empty()) { + const SharedPtr key(root->key()); + uint8_t outp[ZT_DEFAULT_UDP_MTU - ZT_PROTO_MIN_PACKET_LENGTH]; + Vector
::iterator a(toSend.begin()); + while (a != toSend.end()) { + const uint64_t packetId = key->nextMessage(RR->identity.address(), root->address()); + int p = Protocol::newPacket(outp, packetId, root->address(), RR->identity.address(), Protocol::VERB_WHOIS); + while ((a != toSend.end()) && (p < (sizeof(outp) - ZT_ADDRESS_LENGTH))) { + a->copyTo(outp + p); + ++a; + p += ZT_ADDRESS_LENGTH; + } + Protocol::armor(outp, p, key, root->cipher()); + RR->expect->sending(packetId, now); + root->send(tPtr, now, outp, p, rootPath); } - Protocol::armor(outp, p, key, root->cipher()); - RR->expect->sending(packetId, now); - root->send(tPtr, now, outp, p, rootPath); } } @@ -708,7 +698,7 @@ bool VL1::m_OK(void *tPtr, const uint64_t packetId, const unsigned int auth, con } const int64_t now = RR->node->now(); - if (!RR->expect->expecting(inRePacketId, now)) { + if (unlikely(!RR->expect->expecting(inRePacketId, now))) { RR->t->incomingPacketDropped(tPtr, 0x4c1f1ff8, packetId, 0, identityFromPeerPtr(peer), path->address(), 0, Protocol::VERB_OK, ZT_TRACE_PACKET_DROP_REASON_REPLY_NOT_EXPECTED); return false; } diff --git a/node/VL1.hpp b/node/VL1.hpp index 6242e8000..e5fc41a71 100644 --- a/node/VL1.hpp +++ b/node/VL1.hpp @@ -60,7 +60,7 @@ public: * @param data Packet data * @param len Packet length */ - void onRemotePacket(void *tPtr,int64_t localSocket,const InetAddress &fromAddr,SharedPtr &data,unsigned int len); + void onRemotePacket(void *tPtr,int64_t localSocket,const InetAddress &fromAddr,SharedPtr &data,unsigned int len) noexcept; private: const RuntimeEnvironment *RR;