diff --git a/make-mac.mk b/make-mac.mk index 279af3071..d476e9b0d 100644 --- a/make-mac.mk +++ b/make-mac.mk @@ -3,7 +3,8 @@ CXX=clang++ INCLUDES= DEFS= LIBS= -ARCH_FLAGS=-msse -msse2 -mssse3 -msse4 -msse4.1 -maes -mpclmul +ARCH_FLAGS=-msse -msse2 -arch x86_64 -arch arm64e + CODESIGN=echo PRODUCTSIGN=echo CODESIGN_APP_CERT= @@ -91,7 +92,7 @@ CXXFLAGS=$(CFLAGS) -std=c++11 -stdlib=libc++ all: one macui ext/x64-salsa2012-asm/salsa2012.o: - $(CC) $(CFLAGS) -c ext/x64-salsa2012-asm/salsa2012.s -o ext/x64-salsa2012-asm/salsa2012.o + as -o ext/x64-salsa2012-asm/salsa2012.o ext/x64-salsa2012-asm/salsa2012.s mac-agent: FORCE $(CC) -Ofast -o MacEthernetTapAgent osdep/MacEthernetTapAgent.c diff --git a/node/Constants.hpp b/node/Constants.hpp index 3445e2613..f9775dd63 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -669,11 +669,7 @@ /** * Desired buffer size for UDP sockets (used in service and osdep but defined here) */ -#if (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__)) #define ZT_UDP_DESIRED_BUF_SIZE 1048576 -#else -#define ZT_UDP_DESIRED_BUF_SIZE 131072 -#endif /** * Desired / recommended min stack size for threads (used on some platforms to reset thread stack size) diff --git a/node/Network.cpp b/node/Network.cpp index 893bd90ed..914c96bc6 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -40,7 +40,7 @@ namespace ZeroTier { namespace { // Returns true if packet appears valid; pos and proto will be set -static bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto) +static inline bool _ipv6GetPayload(const uint8_t *frameData,unsigned int frameLen,unsigned int &pos,unsigned int &proto) { if (frameLen < 40) return false; diff --git a/node/Packet.cpp b/node/Packet.cpp index 1dbb4211b..3bdeaf9e9 100644 --- a/node/Packet.cpp +++ b/node/Packet.cpp @@ -19,7 +19,7 @@ #include "Packet.hpp" -#ifdef ZT_USE_X64_ASM_SALSA2012 +#if defined(ZT_USE_X64_ASM_SALSA2012) && defined(ZT_ARCH_X64) #include "../ext/x64-salsa2012-asm/salsa2012.h" #endif #ifdef ZT_USE_ARM32_NEON_ASM_SALSA2012 @@ -42,7 +42,7 @@ namespace ZeroTier { /* Set up macros for fast single-pass ASM Salsa20/12 crypto, if we have it */ // x64 SSE crypto -#ifdef ZT_USE_X64_ASM_SALSA2012 +#if defined(ZT_USE_X64_ASM_SALSA2012) && defined(ZT_ARCH_X64) #define ZT_HAS_FAST_CRYPTO() (true) #define ZT_FAST_SINGLE_PASS_SALSA2012(b,l,n,k) zt_salsa2012_amd64_xmm6(reinterpret_cast(b),(l),reinterpret_cast(n),reinterpret_cast(k)) #endif @@ -880,7 +880,7 @@ void Packet::armor(const void *key,bool encryptPayload,const AES aesKeys[2]) { uint8_t *const data = reinterpret_cast(unsafeData()); if ((aesKeys) && (encryptPayload)) { - char tmp0[16],tmp1[16]; + //char tmp0[16],tmp1[16]; setCipher(ZT_PROTO_CIPHER_SUITE__AES_GMAC_SIV); uint8_t *const payload = data + ZT_PACKET_IDX_VERB; diff --git a/node/Utils.cpp b/node/Utils.cpp index 4d32c9b3a..345326984 100644 --- a/node/Utils.cpp +++ b/node/Utils.cpp @@ -96,13 +96,16 @@ const char Utils::HEXCHARS[16] = { '0','1','2','3','4','5','6','7','8','9','a',' #ifdef ZT_ARCH_ARM_HAS_NEON Utils::ARMCapabilities::ARMCapabilities() noexcept { -#if TARGET_OS_IPHONE +#ifdef __APPLE__ + this->aes = true; this->crc32 = true; this->pmull = true; this->sha1 = true; this->sha2 = true; + #else + #ifdef HWCAP2_AES if (sizeof(void *) == 4) { const long hwcaps2 = getauxval(AT_HWCAP2); @@ -122,7 +125,8 @@ Utils::ARMCapabilities::ARMCapabilities() noexcept #ifdef HWCAP2_AES } #endif -#endif // TARGET_OS_IPHONE + +#endif // __APPLE__ } const Utils::ARMCapabilities Utils::ARMCAP; diff --git a/osdep/BlockingQueue.hpp b/osdep/BlockingQueue.hpp index c99eba503..cce37a04a 100644 --- a/osdep/BlockingQueue.hpp +++ b/osdep/BlockingQueue.hpp @@ -18,8 +18,8 @@ #include #include #include - -#include "Thread.hpp" +#include +#include namespace ZeroTier { @@ -67,7 +67,8 @@ public: inline bool get(T &value) { std::unique_lock lock(m); - if (!r) return false; + if (!r) + return false; while (q.empty()) { c.wait(lock); if (!r) { @@ -81,6 +82,16 @@ public: return true; } + inline std::vector drain() + { + std::vector v; + while (!q.empty()) { + v.push_back(q.front()); + q.pop(); + } + return v; + } + enum TimedWaitResult { OK, @@ -92,7 +103,8 @@ public: { const std::chrono::milliseconds ms2{ms}; std::unique_lock lock(m); - if (!r) return STOP; + if (!r) + return STOP; while (q.empty()) { if (c.wait_for(lock,ms2) == std::cv_status::timeout) return ((r) ? TIMED_OUT : STOP); @@ -105,10 +117,10 @@ public: } private: - volatile bool r; std::queue q; mutable std::mutex m; mutable std::condition_variable c,gc; + std::atomic_bool r; }; } // namespace ZeroTier diff --git a/osdep/LinuxEthernetTap.cpp b/osdep/LinuxEthernetTap.cpp index 4abc54b92..9306dc34c 100644 --- a/osdep/LinuxEthernetTap.cpp +++ b/osdep/LinuxEthernetTap.cpp @@ -11,6 +11,10 @@ */ /****/ +#ifdef __GNUC__ +#pragma GCC diagnostic ignored "-Wrestrict" +#endif + #include "../node/Constants.hpp" #ifdef __LINUX__ @@ -52,13 +56,13 @@ #define IFNAMSIZ 16 #endif +#define ZT_TAP_BUF_SIZE 16384 + // ff:ff:ff:ff:ff:ff with no ADI static const ZeroTier::MulticastGroup _blindWildcardMulticastGroup(ZeroTier::MAC(0xff),0); namespace ZeroTier { -static Mutex __tapCreateLock; - static const char _base32_chars[32] = { 'a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z','2','3','4','5','6','7' }; static void _base32_5_to_8(const uint8_t *in,char *out) { @@ -90,16 +94,18 @@ LinuxEthernetTap::LinuxEthernetTap( _fd(0), _enabled(true) { + static std::mutex s_tapCreateLock; char procpath[128],nwids[32]; struct stat sbuf; - // ensure netlink connection is started + // Create only one tap at a time globally. + std::lock_guard tapCreateLock(s_tapCreateLock); + + // Make sure Linux netlink is initialized. (void)LinuxNetLink::getInstance(); OSUtils::ztsnprintf(nwids,sizeof(nwids),"%.16llx",nwid); - Mutex::Lock _l(__tapCreateLock); // create only one tap at a time, globally - _fd = ::open("/dev/net/tun",O_RDWR); if (_fd <= 0) { _fd = ::open("/dev/tun",O_RDWR); @@ -110,7 +116,8 @@ LinuxEthernetTap::LinuxEthernetTap( struct ifreq ifr; memset(&ifr,0,sizeof(ifr)); - // Restore device names from legacy devicemap, but for new devices we use a base32-based canonical naming + // Restore device names from legacy devicemap, but for new devices we use a base32-based + // canonical device name. std::map globalDeviceMap; FILE *devmapf = fopen((_homePath + ZT_PATH_SEPARATOR_S + "devicemap").c_str(),"r"); if (devmapf) { @@ -173,22 +180,154 @@ LinuxEthernetTap::LinuxEthernetTap( ::ioctl(_fd,TUNSETPERSIST,0); // valgrind may generate a false alarm here _dev = ifr.ifr_name; - - // Set close-on-exec so that devices cannot persist if we fork/exec for update ::fcntl(_fd,F_SETFD,fcntl(_fd,F_GETFD) | FD_CLOEXEC); (void)::pipe(_shutdownSignalPipe); - _thread = Thread::start(this); + _tapReaderThread = std::thread([this]{ + fd_set readfds,nullfds; + int n,nfds,r; + void *buf = nullptr; + + { + struct ifreq ifr; + memset(&ifr,0,sizeof(ifr)); + strcpy(ifr.ifr_name,_dev.c_str()); + + const int sock = socket(AF_INET,SOCK_DGRAM,0); + if (sock <= 0) + return; + + if (ioctl(sock,SIOCGIFFLAGS,(void *)&ifr) < 0) { + ::close(sock); + printf("WARNING: ioctl() failed setting up Linux tap device (bring interface up)\n"); + return; + } + ifr.ifr_flags |= IFF_UP; + if (ioctl(sock,SIOCSIFFLAGS,(void *)&ifr) < 0) { + ::close(sock); + printf("WARNING: ioctl() failed setting up Linux tap device (bring interface up)\n"); + return; + } + + // Some kernel versions seem to require you to yield while the device comes up + // before they will accept MTU and MAC. For others it doesn't matter, but is + // harmless. This was moved to the worker thread though so as not to block the + // main ZeroTier loop. + usleep(500000); + + ifr.ifr_ifru.ifru_hwaddr.sa_family = ARPHRD_ETHER; + _mac.copyTo(ifr.ifr_ifru.ifru_hwaddr.sa_data,6); + if (ioctl(sock,SIOCSIFHWADDR,(void *)&ifr) < 0) { + ::close(sock); + printf("WARNING: ioctl() failed setting up Linux tap device (set MAC)\n"); + return; + } + + ifr.ifr_ifru.ifru_mtu = (int)_mtu; + if (ioctl(sock,SIOCSIFMTU,(void *)&ifr) < 0) { + ::close(sock); + printf("WARNING: ioctl() failed setting up Linux tap device (set MTU)\n"); + return; + } + + fcntl(_fd,F_SETFL,O_NONBLOCK); + + ::close(sock); + } + + FD_ZERO(&readfds); + FD_ZERO(&nullfds); + nfds = (int)std::max(_shutdownSignalPipe[0],_fd) + 1; + + r = 0; + for(;;) { + FD_SET(_shutdownSignalPipe[0],&readfds); + FD_SET(_fd,&readfds); + select(nfds,&readfds,&nullfds,&nullfds,(struct timeval *)0); + + if (FD_ISSET(_shutdownSignalPipe[0],&readfds)) // writes to shutdown pipe terminate thread + break; + + if (FD_ISSET(_fd,&readfds)) { + for(;;) { // read until there are no more packets, then return to outer select() loop + if (!buf) { + std::lock_guard l(_buffers_l); + if (_buffers.empty()) { + buf = malloc(ZT_TAP_BUF_SIZE); + if (!buf) + break; + } else { + buf = _buffers.back(); + _buffers.pop_back(); + } + } + + n = (int)::read(_fd,reinterpret_cast(buf) + r,ZT_TAP_BUF_SIZE - r); + + if (n > 0) { + // Some tap drivers like to send the ethernet frame and the + // payload in two chunks, so handle that by accumulating + // data until we have at least a frame. + r += n; + if (r > 14) { + if (r > ((int)_mtu + 14)) // sanity check for weird TAP behavior on some platforms + r = _mtu + 14; + + if (_enabled) { + _tapq.post(std::pair(buf,r)); + buf = nullptr; + } + + r = 0; + } + } else { + r = 0; + break; + } + } + } + } + }); + + _tapProcessorThread = std::thread([this] { + MAC to,from; + std::pair qi; + while (_tapq.get(qi)) { + uint8_t *const b = reinterpret_cast(qi.first); + if (b) { + to.setTo(b, 6); + from.setTo(b + 6, 6); + unsigned int etherType = Utils::ntoh(((const uint16_t *)b)[6]); + _handler(_arg, nullptr, _nwid, from, to, etherType, 0, (const void *)(b + 14),(unsigned int)(qi.second - 14)); + { + std::lock_guard l(_buffers_l); + _buffers.push_back(qi.first); + } + } else break; + } + }); } LinuxEthernetTap::~LinuxEthernetTap() { - (void)::write(_shutdownSignalPipe[1],"\0",1); // causes thread to exit - Thread::join(_thread); + (void)::write(_shutdownSignalPipe[1],"\0",1); // causes reader thread to exit + _tapq.post(std::pair(nullptr,0)); // causes processor thread to exit + ::close(_fd); ::close(_shutdownSignalPipe[0]); ::close(_shutdownSignalPipe[1]); + + _tapReaderThread.join(); + _tapProcessorThread.join(); + + for(std::vector::iterator i(_buffers.begin());i!=_buffers.end();++i) + free(*i); + std::vector< std::pair > dv(_tapq.drain()); + for(std::vector< std::pair >::iterator i(dv.begin());i!=dv.end();++i) { + if (i->first) + free(i->first); + } } void LinuxEthernetTap::setEnabled(bool en) @@ -402,106 +541,6 @@ void LinuxEthernetTap::setMtu(unsigned int mtu) } } -void LinuxEthernetTap::threadMain() - throw() -{ - fd_set readfds,nullfds; - MAC to,from; - int n,nfds,r; - char getBuf[ZT_MAX_MTU + 64]; - - Thread::sleep(100); - - { - struct ifreq ifr; - memset(&ifr,0,sizeof(ifr)); - - strcpy(ifr.ifr_name,_dev.c_str()); - - const int sock = socket(AF_INET,SOCK_DGRAM,0); - if (sock <= 0) - return; - - if (ioctl(sock,SIOCGIFFLAGS,(void *)&ifr) < 0) { - ::close(sock); - printf("WARNING: ioctl() failed setting up Linux tap device (bring interface up)\n"); - return; - } - ifr.ifr_flags |= IFF_UP; - if (ioctl(sock,SIOCSIFFLAGS,(void *)&ifr) < 0) { - ::close(sock); - printf("WARNING: ioctl() failed setting up Linux tap device (bring interface up)\n"); - return; - } - - Thread::sleep(500); - - ifr.ifr_ifru.ifru_hwaddr.sa_family = ARPHRD_ETHER; - _mac.copyTo(ifr.ifr_ifru.ifru_hwaddr.sa_data,6); - if (ioctl(sock,SIOCSIFHWADDR,(void *)&ifr) < 0) { - ::close(sock); - printf("WARNING: ioctl() failed setting up Linux tap device (set MAC)\n"); - return; - } - - ifr.ifr_ifru.ifru_mtu = (int)_mtu; - if (ioctl(sock,SIOCSIFMTU,(void *)&ifr) < 0) { - ::close(sock); - printf("WARNING: ioctl() failed setting up Linux tap device (set MTU)\n"); - return; - } - - if (fcntl(_fd,F_SETFL,fcntl(_fd,F_GETFL) & ~O_NONBLOCK) == -1) { - ::close(sock); - printf("WARNING: ioctl() failed setting up Linux tap device (set non-blocking)\n"); - return; - } - - ::close(sock); - } - - FD_ZERO(&readfds); - FD_ZERO(&nullfds); - nfds = (int)std::max(_shutdownSignalPipe[0],_fd) + 1; - - r = 0; - for(;;) { - FD_SET(_shutdownSignalPipe[0],&readfds); - FD_SET(_fd,&readfds); - select(nfds,&readfds,&nullfds,&nullfds,(struct timeval *)0); - - if (FD_ISSET(_shutdownSignalPipe[0],&readfds)) // writes to shutdown pipe terminate thread - break; - - if (FD_ISSET(_fd,&readfds)) { - n = (int)::read(_fd,getBuf + r,sizeof(getBuf) - r); - if (n < 0) { - if ((errno != EINTR)&&(errno != ETIMEDOUT)) - break; - } else { - // Some tap drivers like to send the ethernet frame and the - // payload in two chunks, so handle that by accumulating - // data until we have at least a frame. - r += n; - if (r > 14) { - if (r > ((int)_mtu + 14)) // sanity check for weird TAP behavior on some platforms - r = _mtu + 14; - - if (_enabled) { - to.setTo(getBuf,6); - from.setTo(getBuf + 6,6); - unsigned int etherType = ntohs(((const uint16_t *)getBuf)[6]); - // TODO: VLAN support - _handler(_arg,(void *)0,_nwid,from,to,etherType,0,(const void *)(getBuf + 14),r - 14); - } - - r = 0; - } - } - } - } -} - } // namespace ZeroTier #endif // __LINUX__ diff --git a/osdep/LinuxEthernetTap.hpp b/osdep/LinuxEthernetTap.hpp index 68fdf2461..9e9206ead 100644 --- a/osdep/LinuxEthernetTap.hpp +++ b/osdep/LinuxEthernetTap.hpp @@ -21,10 +21,12 @@ #include #include #include - +#include +#include +#include #include "../node/MulticastGroup.hpp" -#include "Thread.hpp" #include "EthernetTap.hpp" +#include "BlockingQueue.hpp" namespace ZeroTier { @@ -56,15 +58,11 @@ public: virtual void setMtu(unsigned int mtu); virtual void setDns(const char *domain, const std::vector &servers) {} - void threadMain() - throw(); - private: void (*_handler)(void *,void *,uint64_t,const MAC &,const MAC &,unsigned int,unsigned int,const void *,unsigned int); void *_arg; uint64_t _nwid; MAC _mac; - Thread _thread; std::string _homePath; std::string _dev; std::vector _multicastGroups; @@ -72,6 +70,11 @@ private: int _fd; int _shutdownSignalPipe[2]; std::atomic_bool _enabled; + std::thread _tapReaderThread; + std::thread _tapProcessorThread; + std::mutex _buffers_l; + std::vector _buffers; + BlockingQueue< std::pair > _tapq; }; } // namespace ZeroTier diff --git a/osdep/Phy.hpp b/osdep/Phy.hpp index 8eb184ba1..8c38d2fae 100644 --- a/osdep/Phy.hpp +++ b/osdep/Phy.hpp @@ -351,14 +351,14 @@ public: int tmpbs = bs; if (setsockopt(s,SOL_SOCKET,SO_RCVBUF,(const char *)&tmpbs,sizeof(tmpbs)) == 0) break; - bs -= 16384; + bs -= 4096; } bs = bufferSize; while (bs >= 65536) { int tmpbs = bs; if (setsockopt(s,SOL_SOCKET,SO_SNDBUF,(const char *)&tmpbs,sizeof(tmpbs)) == 0) break; - bs -= 16384; + bs -= 4096; } } diff --git a/selftest.cpp b/selftest.cpp index d11f14ca8..357e9a026 100644 --- a/selftest.cpp +++ b/selftest.cpp @@ -47,7 +47,7 @@ #include "osdep/PortMapper.hpp" #include "osdep/Thread.hpp" -#ifdef ZT_USE_X64_ASM_SALSA2012 +#if defined(ZT_USE_X64_ASM_SALSA2012) && defined(ZT_ARCH_X64) #include "ext/x64-salsa2012-asm/salsa2012.h" #endif #ifdef ZT_USE_ARM32_NEON_ASM_SALSA2012 @@ -203,7 +203,7 @@ static int testCrypto() ::free((void *)bb); } -#ifdef ZT_USE_X64_ASM_SALSA2012 +#if defined(ZT_USE_X64_ASM_SALSA2012) && defined(ZT_ARCH_X64) std::cout << "[crypto] Benchmarking Salsa20/12 fast x64 ASM... "; std::cout.flush(); { unsigned char *bb = (unsigned char *)::malloc(1234567);