From 060870462606f82d3a9b0fe6aca87f8a5de8705e Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Fri, 15 Dec 2017 11:03:20 -0800 Subject: [PATCH] Some micro-optimizations, including a memcpy that is faster than Linux for most distro/compiler versions. --- controller/EmbeddedNetworkController.cpp | 8 ++--- make-linux.mk | 6 ++-- node/Buffer.hpp | 8 ++--- node/Capability.hpp | 10 +++--- node/CertificateOfMembership.hpp | 6 ++-- node/CertificateOfOwnership.hpp | 8 ++--- node/Dictionary.hpp | 2 +- node/Identity.hpp | 4 +-- node/InetAddress.cpp | 16 ++++----- node/InetAddress.hpp | 34 +++++++++--------- node/Network.cpp | 6 ++-- node/NetworkConfig.hpp | 4 +-- node/Node.cpp | 4 +-- node/OutboundMulticast.cpp | 2 +- node/Packet.cpp | 28 +++++++-------- node/Path.hpp | 4 +-- node/Revocation.hpp | 2 +- node/Tag.hpp | 2 +- node/Utils.hpp | 38 ++++++++++++++++++++ node/World.hpp | 4 +-- selftest.cpp | 44 ++++++++++++++++++++++++ service/OneService.cpp | 4 +-- 22 files changed, 163 insertions(+), 81 deletions(-) diff --git a/controller/EmbeddedNetworkController.cpp b/controller/EmbeddedNetworkController.cpp index d8ac1878f..2032f097a 100644 --- a/controller/EmbeddedNetworkController.cpp +++ b/controller/EmbeddedNetworkController.cpp @@ -333,14 +333,14 @@ static bool _parseRule(json &r,ZT_VirtualNetworkRule &rule) } else if (t == "MATCH_IPV6_SOURCE") { rule.t |= ZT_NETWORK_RULE_MATCH_IPV6_SOURCE; InetAddress ip(OSUtils::jsonString(r["ip"],"::0").c_str()); - memcpy(rule.v.ipv6.ip,reinterpret_cast(&ip)->sin6_addr.s6_addr,16); + ZT_FAST_MEMCPY(rule.v.ipv6.ip,reinterpret_cast(&ip)->sin6_addr.s6_addr,16); rule.v.ipv6.mask = Utils::ntoh(reinterpret_cast(&ip)->sin6_port) & 0xff; if (rule.v.ipv6.mask > 128) rule.v.ipv6.mask = 128; return true; } else if (t == "MATCH_IPV6_DEST") { rule.t |= ZT_NETWORK_RULE_MATCH_IPV6_DEST; InetAddress ip(OSUtils::jsonString(r["ip"],"::0").c_str()); - memcpy(rule.v.ipv6.ip,reinterpret_cast(&ip)->sin6_addr.s6_addr,16); + ZT_FAST_MEMCPY(rule.v.ipv6.ip,reinterpret_cast(&ip)->sin6_addr.s6_addr,16); rule.v.ipv6.mask = Utils::ntoh(reinterpret_cast(&ip)->sin6_port) & 0xff; if (rule.v.ipv6.mask > 128) rule.v.ipv6.mask = 128; return true; @@ -1520,8 +1520,8 @@ void EmbeddedNetworkController::_request( InetAddress ipRangeEnd(OSUtils::jsonString(pool["ipRangeEnd"],"").c_str()); if ( (ipRangeStart.ss_family == AF_INET6) && (ipRangeEnd.ss_family == AF_INET6) ) { uint64_t s[2],e[2],x[2],xx[2]; - memcpy(s,ipRangeStart.rawIpData(),16); - memcpy(e,ipRangeEnd.rawIpData(),16); + ZT_FAST_MEMCPY(s,ipRangeStart.rawIpData(),16); + ZT_FAST_MEMCPY(e,ipRangeEnd.rawIpData(),16); s[0] = Utils::ntoh(s[0]); s[1] = Utils::ntoh(s[1]); e[0] = Utils::ntoh(e[0]); diff --git a/make-linux.mk b/make-linux.mk index 5b6232473..992689902 100644 --- a/make-linux.mk +++ b/make-linux.mk @@ -66,9 +66,9 @@ ifeq ($(ZT_DEBUG),1) node/Salsa20.o node/SHA512.o node/C25519.o node/Poly1305.o: CXXFLAGS=-Wall -O2 -g -pthread $(INCLUDES) $(DEFS) else override DEFS+=-D_FORTIFY_SOURCE=2 - CFLAGS?=-Os -fstack-protector + CFLAGS?=-O3 -fstack-protector override CFLAGS+=-Wall -Wno-deprecated -fPIE -pthread $(INCLUDES) -DNDEBUG $(DEFS) - CXXFLAGS?=-Os -fstack-protector + CXXFLAGS?=-O3 -fstack-protector override CXXFLAGS+=-Wall -Wno-deprecated -Wno-unused-result -Wreorder -fPIE -std=c++11 -pthread $(INCLUDES) -DNDEBUG $(DEFS) override LDFLAGS+=-pie -Wl,-z,relro,-z,now STRIP?=strip @@ -241,7 +241,7 @@ zerotier-idtool: one zerotier-cli: one libzerotiercore.a: FORCE - make CFLAGS="-Os -fstack-protector -fPIC" CXXFLAGS="-Os -std=c++11 -fstack-protector -fPIC" $(CORE_OBJS) + make CFLAGS="-O3 -fstack-protector -fPIC" CXXFLAGS="-O3 -std=c++11 -fstack-protector -fPIC" $(CORE_OBJS) ar rcs libzerotiercore.a $(CORE_OBJS) ranlib libzerotiercore.a diff --git a/node/Buffer.hpp b/node/Buffer.hpp index 7b91e72fb..f84b3774c 100644 --- a/node/Buffer.hpp +++ b/node/Buffer.hpp @@ -116,9 +116,9 @@ public: if (unlikely(b._l > C)) throw ZT_EXCEPTION_OUT_OF_BOUNDS; if (C2 == C) { - memcpy(this,&b,sizeof(Buffer)); + ZT_FAST_MEMCPY(this,&b,sizeof(Buffer)); } else { - memcpy(_b,b._b,_l = b._l); + ZT_FAST_MEMCPY(_b,b._b,_l = b._l); } return *this; } @@ -127,7 +127,7 @@ public: { if (unlikely(l > C)) throw ZT_EXCEPTION_OUT_OF_BOUNDS; - memcpy(_b,b,l); + ZT_FAST_MEMCPY(_b,b,l); _l = l; } @@ -281,7 +281,7 @@ public: { if (unlikely((_l + l) > C)) throw ZT_EXCEPTION_OUT_OF_BOUNDS; - memcpy(_b + _l,b,l); + ZT_FAST_MEMCPY(_b + _l,b,l); _l += l; } diff --git a/node/Capability.hpp b/node/Capability.hpp index 407884ad9..bec1f47f5 100644 --- a/node/Capability.hpp +++ b/node/Capability.hpp @@ -94,7 +94,7 @@ public: _maxCustodyChainLength = (mccl > 0) ? ((mccl < ZT_MAX_CAPABILITY_CUSTODY_CHAIN_LENGTH) ? mccl : (unsigned int)ZT_MAX_CAPABILITY_CUSTODY_CHAIN_LENGTH) : 1; _ruleCount = (ruleCount < ZT_MAX_CAPABILITY_RULES) ? ruleCount : ZT_MAX_CAPABILITY_RULES; if (_ruleCount) - memcpy(_rules,rules,sizeof(ZT_VirtualNetworkRule) * _ruleCount); + ZT_FAST_MEMCPY(_rules,rules,sizeof(ZT_VirtualNetworkRule) * _ruleCount); } /** @@ -320,16 +320,16 @@ public: break; case ZT_NETWORK_RULE_MATCH_MAC_SOURCE: case ZT_NETWORK_RULE_MATCH_MAC_DEST: - memcpy(rules[ruleCount].v.mac,b.field(p,6),6); + ZT_FAST_MEMCPY(rules[ruleCount].v.mac,b.field(p,6),6); break; case ZT_NETWORK_RULE_MATCH_IPV4_SOURCE: case ZT_NETWORK_RULE_MATCH_IPV4_DEST: - memcpy(&(rules[ruleCount].v.ipv4.ip),b.field(p,4),4); + ZT_FAST_MEMCPY(&(rules[ruleCount].v.ipv4.ip),b.field(p,4),4); rules[ruleCount].v.ipv4.mask = (uint8_t)b[p + 4]; break; case ZT_NETWORK_RULE_MATCH_IPV6_SOURCE: case ZT_NETWORK_RULE_MATCH_IPV6_DEST: - memcpy(rules[ruleCount].v.ipv6.ip,b.field(p,16),16); + ZT_FAST_MEMCPY(rules[ruleCount].v.ipv6.ip,b.field(p,16),16); rules[ruleCount].v.ipv6.mask = (uint8_t)b[p + 16]; break; case ZT_NETWORK_RULE_MATCH_IP_TOS: @@ -452,7 +452,7 @@ public: if (b.template at(p) != ZT_C25519_SIGNATURE_LEN) throw ZT_EXCEPTION_INVALID_SERIALIZED_DATA_INVALID_CRYPTOGRAPHIC_TOKEN; p += 2; - memcpy(_custody[i].signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; + ZT_FAST_MEMCPY(_custody[i].signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; } else { p += 2 + b.template at(p); } diff --git a/node/CertificateOfMembership.hpp b/node/CertificateOfMembership.hpp index 0105fade2..2eff10839 100644 --- a/node/CertificateOfMembership.hpp +++ b/node/CertificateOfMembership.hpp @@ -119,7 +119,7 @@ public: CertificateOfMembership(const CertificateOfMembership &c) { - memcpy(this,&c,sizeof(CertificateOfMembership)); + ZT_FAST_MEMCPY(this,&c,sizeof(CertificateOfMembership)); } /** @@ -147,7 +147,7 @@ public: inline CertificateOfMembership &operator=(const CertificateOfMembership &c) { - memcpy(this,&c,sizeof(CertificateOfMembership)); + ZT_FAST_MEMCPY(this,&c,sizeof(CertificateOfMembership)); return *this; } @@ -329,7 +329,7 @@ public: p += ZT_ADDRESS_LENGTH; if (_signedBy) { - memcpy(_signature.data,b.field(p,(unsigned int)_signature.size()),_signature.size()); + ZT_FAST_MEMCPY(_signature.data,b.field(p,(unsigned int)_signature.size()),_signature.size()); p += (unsigned int)_signature.size(); } diff --git a/node/CertificateOfOwnership.hpp b/node/CertificateOfOwnership.hpp index 431bcc032..e397fd63b 100644 --- a/node/CertificateOfOwnership.hpp +++ b/node/CertificateOfOwnership.hpp @@ -115,11 +115,11 @@ public: if (_thingCount >= ZT_CERTIFICATEOFOWNERSHIP_MAX_THINGS) return; if (ip.ss_family == AF_INET) { _thingTypes[_thingCount] = THING_IPV4_ADDRESS; - memcpy(_thingValues[_thingCount],&(reinterpret_cast(&ip)->sin_addr.s_addr),4); + ZT_FAST_MEMCPY(_thingValues[_thingCount],&(reinterpret_cast(&ip)->sin_addr.s_addr),4); ++_thingCount; } else if (ip.ss_family == AF_INET6) { _thingTypes[_thingCount] = THING_IPV6_ADDRESS; - memcpy(_thingValues[_thingCount],reinterpret_cast(&ip)->sin6_addr.s6_addr,16); + ZT_FAST_MEMCPY(_thingValues[_thingCount],reinterpret_cast(&ip)->sin6_addr.s6_addr,16); ++_thingCount; } } @@ -198,7 +198,7 @@ public: for(unsigned int i=0,j=_thingCount;i(p) != ZT_C25519_SIGNATURE_LEN) throw ZT_EXCEPTION_INVALID_SERIALIZED_DATA_INVALID_CRYPTOGRAPHIC_TOKEN; p += 2; - memcpy(_signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; + ZT_FAST_MEMCPY(_signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; } else { p += 2 + b.template at(p); } diff --git a/node/Dictionary.hpp b/node/Dictionary.hpp index 061dcac1b..a1b0f9cbb 100644 --- a/node/Dictionary.hpp +++ b/node/Dictionary.hpp @@ -81,7 +81,7 @@ public: if (s) { if (len > (C-1)) len = C-1; - memcpy(_d,s,len); + ZT_FAST_MEMCPY(_d,s,len); _d[len] = (char)0; } else { _d[0] = (char)0; diff --git a/node/Identity.hpp b/node/Identity.hpp index 3d4d93857..ab2886ad1 100644 --- a/node/Identity.hpp +++ b/node/Identity.hpp @@ -249,7 +249,7 @@ public: if (b[p++] != 0) throw ZT_EXCEPTION_INVALID_SERIALIZED_DATA_INVALID_TYPE; - memcpy(_publicKey.data,b.field(p,(unsigned int)_publicKey.size()),(unsigned int)_publicKey.size()); + ZT_FAST_MEMCPY(_publicKey.data,b.field(p,(unsigned int)_publicKey.size()),(unsigned int)_publicKey.size()); p += (unsigned int)_publicKey.size(); unsigned int privateKeyLength = (unsigned int)b[p++]; @@ -257,7 +257,7 @@ public: if (privateKeyLength != ZT_C25519_PRIVATE_KEY_LEN) throw ZT_EXCEPTION_INVALID_SERIALIZED_DATA_INVALID_CRYPTOGRAPHIC_TOKEN; _privateKey = new C25519::Private(); - memcpy(_privateKey->data,b.field(p,ZT_C25519_PRIVATE_KEY_LEN),ZT_C25519_PRIVATE_KEY_LEN); + ZT_FAST_MEMCPY(_privateKey->data,b.field(p,ZT_C25519_PRIVATE_KEY_LEN),ZT_C25519_PRIVATE_KEY_LEN); p += ZT_C25519_PRIVATE_KEY_LEN; } diff --git a/node/InetAddress.cpp b/node/InetAddress.cpp index d3efc0890..ee32ce722 100644 --- a/node/InetAddress.cpp +++ b/node/InetAddress.cpp @@ -115,13 +115,13 @@ void InetAddress::set(const void *ipBytes,unsigned int ipLen,unsigned int port) memset(this,0,sizeof(InetAddress)); if (ipLen == 4) { uint32_t ipb[1]; - memcpy(ipb,ipBytes,4); + ZT_FAST_MEMCPY(ipb,ipBytes,4); ss_family = AF_INET; reinterpret_cast(this)->sin_addr.s_addr = ipb[0]; reinterpret_cast(this)->sin_port = Utils::hton((uint16_t)port); } else if (ipLen == 16) { ss_family = AF_INET6; - memcpy(reinterpret_cast(this)->sin6_addr.s6_addr,ipBytes,16); + ZT_FAST_MEMCPY(reinterpret_cast(this)->sin6_addr.s6_addr,ipBytes,16); reinterpret_cast(this)->sin6_port = Utils::hton((uint16_t)port); } } @@ -154,7 +154,7 @@ char *InetAddress::toIpString(char buf[64]) const case AF_INET6: { uint16_t a[8]; - memcpy(a,reinterpret_cast(this)->sin6_addr.s6_addr,16); + ZT_FAST_MEMCPY(a,reinterpret_cast(this)->sin6_addr.s6_addr,16); char *p = buf; for(int i=0;i<8;++i) { Utils::hex(Utils::ntoh(a[i]),p); @@ -199,7 +199,7 @@ bool InetAddress::fromString(const char *ipSlashPort) struct sockaddr_in6 *const in6 = reinterpret_cast(this); in6->sin6_family = AF_INET6; - memcpy(in6->sin6_addr.s6_addr,a,16); + ZT_FAST_MEMCPY(in6->sin6_addr.s6_addr,a,16); in6->sin6_port = Utils::hton((uint16_t)port); return true; @@ -212,7 +212,7 @@ bool InetAddress::fromString(const char *ipSlashPort) struct sockaddr_in *const in = reinterpret_cast(this); in->sin_family = AF_INET; - memcpy(&(in->sin_addr.s_addr),a,4); + ZT_FAST_MEMCPY(&(in->sin_addr.s_addr),a,4); in->sin_port = Utils::hton((uint16_t)port); return true; @@ -238,7 +238,7 @@ InetAddress InetAddress::netmask() const nm[0] = 0; nm[1] = 0; } - memcpy(reinterpret_cast(&r)->sin6_addr.s6_addr,nm,16); + ZT_FAST_MEMCPY(reinterpret_cast(&r)->sin6_addr.s6_addr,nm,16); } break; } return r; @@ -264,10 +264,10 @@ InetAddress InetAddress::network() const case AF_INET6: { uint64_t nm[2]; const unsigned int bits = netmaskBits(); - memcpy(nm,reinterpret_cast(&r)->sin6_addr.s6_addr,16); + ZT_FAST_MEMCPY(nm,reinterpret_cast(&r)->sin6_addr.s6_addr,16); nm[0] &= Utils::hton((uint64_t)((bits >= 64) ? 0xffffffffffffffffULL : (0xffffffffffffffffULL << (64 - bits)))); nm[1] &= Utils::hton((uint64_t)((bits <= 64) ? 0ULL : (0xffffffffffffffffULL << (128 - bits)))); - memcpy(reinterpret_cast(&r)->sin6_addr.s6_addr,nm,16); + ZT_FAST_MEMCPY(reinterpret_cast(&r)->sin6_addr.s6_addr,nm,16); } break; } return r; diff --git a/node/InetAddress.hpp b/node/InetAddress.hpp index 76810b936..fd430099a 100644 --- a/node/InetAddress.hpp +++ b/node/InetAddress.hpp @@ -91,8 +91,8 @@ struct InetAddress : public sockaddr_storage }; InetAddress() { memset(this,0,sizeof(InetAddress)); } - InetAddress(const InetAddress &a) { memcpy(this,&a,sizeof(InetAddress)); } - InetAddress(const InetAddress *a) { memcpy(this,a,sizeof(InetAddress)); } + InetAddress(const InetAddress &a) { ZT_FAST_MEMCPY(this,&a,sizeof(InetAddress)); } + InetAddress(const InetAddress *a) { ZT_FAST_MEMCPY(this,a,sizeof(InetAddress)); } InetAddress(const struct sockaddr_storage &ss) { *this = ss; } InetAddress(const struct sockaddr_storage *ss) { *this = ss; } InetAddress(const struct sockaddr &sa) { *this = sa; } @@ -108,28 +108,28 @@ struct InetAddress : public sockaddr_storage inline InetAddress &operator=(const InetAddress &a) { if (&a != this) - memcpy(this,&a,sizeof(InetAddress)); + ZT_FAST_MEMCPY(this,&a,sizeof(InetAddress)); return *this; } inline InetAddress &operator=(const InetAddress *a) { if (a != this) - memcpy(this,a,sizeof(InetAddress)); + ZT_FAST_MEMCPY(this,a,sizeof(InetAddress)); return *this; } inline InetAddress &operator=(const struct sockaddr_storage &ss) { if (reinterpret_cast(&ss) != this) - memcpy(this,&ss,sizeof(InetAddress)); + ZT_FAST_MEMCPY(this,&ss,sizeof(InetAddress)); return *this; } inline InetAddress &operator=(const struct sockaddr_storage *ss) { if (reinterpret_cast(ss) != this) - memcpy(this,ss,sizeof(InetAddress)); + ZT_FAST_MEMCPY(this,ss,sizeof(InetAddress)); return *this; } @@ -137,7 +137,7 @@ struct InetAddress : public sockaddr_storage { if (reinterpret_cast(&sa) != this) { memset(this,0,sizeof(InetAddress)); - memcpy(this,&sa,sizeof(struct sockaddr_in)); + ZT_FAST_MEMCPY(this,&sa,sizeof(struct sockaddr_in)); } return *this; } @@ -146,7 +146,7 @@ struct InetAddress : public sockaddr_storage { if (reinterpret_cast(sa) != this) { memset(this,0,sizeof(InetAddress)); - memcpy(this,sa,sizeof(struct sockaddr_in)); + ZT_FAST_MEMCPY(this,sa,sizeof(struct sockaddr_in)); } return *this; } @@ -155,7 +155,7 @@ struct InetAddress : public sockaddr_storage { if (reinterpret_cast(&sa) != this) { memset(this,0,sizeof(InetAddress)); - memcpy(this,&sa,sizeof(struct sockaddr_in6)); + ZT_FAST_MEMCPY(this,&sa,sizeof(struct sockaddr_in6)); } return *this; } @@ -164,7 +164,7 @@ struct InetAddress : public sockaddr_storage { if (reinterpret_cast(sa) != this) { memset(this,0,sizeof(InetAddress)); - memcpy(this,sa,sizeof(struct sockaddr_in6)); + ZT_FAST_MEMCPY(this,sa,sizeof(struct sockaddr_in6)); } return *this; } @@ -175,10 +175,10 @@ struct InetAddress : public sockaddr_storage memset(this,0,sizeof(InetAddress)); switch(sa.sa_family) { case AF_INET: - memcpy(this,&sa,sizeof(struct sockaddr_in)); + ZT_FAST_MEMCPY(this,&sa,sizeof(struct sockaddr_in)); break; case AF_INET6: - memcpy(this,&sa,sizeof(struct sockaddr_in6)); + ZT_FAST_MEMCPY(this,&sa,sizeof(struct sockaddr_in6)); break; } } @@ -191,10 +191,10 @@ struct InetAddress : public sockaddr_storage memset(this,0,sizeof(InetAddress)); switch(sa->sa_family) { case AF_INET: - memcpy(this,sa,sizeof(struct sockaddr_in)); + ZT_FAST_MEMCPY(this,sa,sizeof(struct sockaddr_in)); break; case AF_INET6: - memcpy(this,sa,sizeof(struct sockaddr_in6)); + ZT_FAST_MEMCPY(this,sa,sizeof(struct sockaddr_in6)); break; } } @@ -388,7 +388,7 @@ struct InetAddress : public sockaddr_storage break; case AF_INET6: r.ss_family = AF_INET6; - memcpy(reinterpret_cast(&r)->sin6_addr.s6_addr,reinterpret_cast(this)->sin6_addr.s6_addr,16); + ZT_FAST_MEMCPY(reinterpret_cast(&r)->sin6_addr.s6_addr,reinterpret_cast(this)->sin6_addr.s6_addr,16); break; } return r; @@ -537,12 +537,12 @@ struct InetAddress : public sockaddr_storage return (unsigned int)(b.template at(p) + 3); // other addresses begin with 16-bit non-inclusive length case 0x04: ss_family = AF_INET; - memcpy(&(reinterpret_cast(this)->sin_addr.s_addr),b.field(p,4),4); p += 4; + ZT_FAST_MEMCPY(&(reinterpret_cast(this)->sin_addr.s_addr),b.field(p,4),4); p += 4; reinterpret_cast(this)->sin_port = Utils::hton(b.template at(p)); p += 2; break; case 0x06: ss_family = AF_INET6; - memcpy(reinterpret_cast(this)->sin6_addr.s6_addr,b.field(p,16),16); p += 16; + ZT_FAST_MEMCPY(reinterpret_cast(this)->sin6_addr.s6_addr,b.field(p,16),16); p += 16; reinterpret_cast(this)->sin_port = Utils::hton(b.template at(p)); p += 2; break; default: diff --git a/node/Network.cpp b/node/Network.cpp index a9e8539e7..e8e91174c 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -983,7 +983,7 @@ uint64_t Network::handleConfigChunk(void *tPtr,const uint64_t packetId,const Add return false; c->haveChunkIds[c->haveChunks++] = chunkId; - memcpy(c->data.unsafeData() + chunkIndex,chunkData,chunkLen); + ZT_FAST_MEMCPY(c->data.unsafeData() + chunkIndex,chunkData,chunkLen); c->haveBytes += chunkLen; if (c->haveBytes == totalLength) { @@ -1385,7 +1385,7 @@ void Network::_externalConfig(ZT_VirtualNetworkConfig *ec) const ec->assignedAddressCount = 0; for(unsigned int i=0;iassignedAddresses[i]),&(_config.staticIps[i]),sizeof(struct sockaddr_storage)); + ZT_FAST_MEMCPY(&(ec->assignedAddresses[i]),&(_config.staticIps[i]),sizeof(struct sockaddr_storage)); ++ec->assignedAddressCount; } else { memset(&(ec->assignedAddresses[i]),0,sizeof(struct sockaddr_storage)); @@ -1395,7 +1395,7 @@ void Network::_externalConfig(ZT_VirtualNetworkConfig *ec) const ec->routeCount = 0; for(unsigned int i=0;iroutes[i]),&(_config.routes[i]),sizeof(ZT_VirtualNetworkRoute)); + ZT_FAST_MEMCPY(&(ec->routes[i]),&(_config.routes[i]),sizeof(ZT_VirtualNetworkRoute)); ++ec->routeCount; } else { memset(&(ec->routes[i]),0,sizeof(ZT_VirtualNetworkRoute)); diff --git a/node/NetworkConfig.hpp b/node/NetworkConfig.hpp index bb48e6aa7..19823e248 100644 --- a/node/NetworkConfig.hpp +++ b/node/NetworkConfig.hpp @@ -234,12 +234,12 @@ public: NetworkConfig(const NetworkConfig &nc) { - memcpy(this,&nc,sizeof(NetworkConfig)); + ZT_FAST_MEMCPY(this,&nc,sizeof(NetworkConfig)); } inline NetworkConfig &operator=(const NetworkConfig &nc) { - memcpy(this,&nc,sizeof(NetworkConfig)); + ZT_FAST_MEMCPY(this,&nc,sizeof(NetworkConfig)); return *this; } diff --git a/node/Node.cpp b/node/Node.cpp index ef8925d56..8d8f5ca0f 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -66,7 +66,7 @@ Node::Node(void *uptr,void *tptr,const struct ZT_Node_Callbacks *callbacks,int64 { if (callbacks->version != 0) throw ZT_EXCEPTION_INVALID_ARGUMENT; - memcpy(&_cb,callbacks,sizeof(ZT_Node_Callbacks)); + ZT_FAST_MEMCPY(&_cb,callbacks,sizeof(ZT_Node_Callbacks)); // Initialize non-cryptographic PRNG from a good random source Utils::getSecureRandom((void *)_prngState,sizeof(_prngState)); @@ -441,7 +441,7 @@ ZT_PeerList *Node::peers() const SharedPtr bestp(pi->second->getBestPath(_now,false)); p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { - memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); + ZT_FAST_MEMCPY(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); p->paths[p->pathCount].lastSend = (*path)->lastOut(); p->paths[p->pathCount].lastReceive = (*path)->lastIn(); p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address()); diff --git a/node/OutboundMulticast.cpp b/node/OutboundMulticast.cpp index 04ba2c2a6..9aa447365 100644 --- a/node/OutboundMulticast.cpp +++ b/node/OutboundMulticast.cpp @@ -78,7 +78,7 @@ void OutboundMulticast::init( if (!disableCompression) _packet.compress(); - memcpy(_frameData,payload,_frameLen); + ZT_FAST_MEMCPY(_frameData,payload,_frameLen); } void OutboundMulticast::sendOnly(const RuntimeEnvironment *RR,void *tPtr,const Address &toAddr) diff --git a/node/Packet.cpp b/node/Packet.cpp index d3f7dfd64..af42cda58 100644 --- a/node/Packet.cpp +++ b/node/Packet.cpp @@ -387,27 +387,27 @@ static void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = val static inline U16 LZ4_read16(const void* memPtr) { - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; + U16 val; ZT_FAST_MEMCPY(&val, memPtr, sizeof(val)); return val; } static inline U32 LZ4_read32(const void* memPtr) { - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; + U32 val; ZT_FAST_MEMCPY(&val, memPtr, sizeof(val)); return val; } static inline reg_t LZ4_read_ARCH(const void* memPtr) { - reg_t val; memcpy(&val, memPtr, sizeof(val)); return val; + reg_t val; ZT_FAST_MEMCPY(&val, memPtr, sizeof(val)); return val; } static inline void LZ4_write16(void* memPtr, U16 value) { - memcpy(memPtr, &value, sizeof(value)); + ZT_FAST_MEMCPY(memPtr, &value, sizeof(value)); } static inline void LZ4_write32(void* memPtr, U32 value) { - memcpy(memPtr, &value, sizeof(value)); + ZT_FAST_MEMCPY(memPtr, &value, sizeof(value)); } #endif /* LZ4_FORCE_MEMORY_ACCESS */ @@ -435,7 +435,7 @@ static inline void LZ4_writeLE16(void* memPtr, U16 value) static inline void LZ4_copy8(void* dst, const void* src) { - memcpy(dst,src,8); + ZT_FAST_MEMCPY(dst,src,8); } /* customized variant of memcpy, which can overwrite up to 8 bytes beyond dstEnd */ @@ -830,7 +830,7 @@ _last_literals: } else { *op++ = (BYTE)(lastRun< oend))) goto _output_error; /* Error : input must be consumed */ } - memcpy(op, ip, length); + ZT_FAST_MEMCPY(op, ip, length); ip += length; op += length; break; /* Necessarily EOF, due to parsing restrictions */ @@ -995,14 +995,14 @@ FORCE_INLINE int LZ4_decompress_generic( /* match encompass external dictionary and current block */ size_t const copySize = (size_t)(lowPrefix-match); size_t const restSize = length - copySize; - memcpy(op, dictEnd - copySize, copySize); + ZT_FAST_MEMCPY(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op-lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) *op++ = *copyFrom++; } else { - memcpy(op, lowPrefix, restSize); + ZT_FAST_MEMCPY(op, lowPrefix, restSize); op += restSize; } } continue; @@ -1017,7 +1017,7 @@ FORCE_INLINE int LZ4_decompress_generic( op[2] = match[2]; op[3] = match[3]; match += dec32table[offset]; - memcpy(op+4, match, 4); + ZT_FAST_MEMCPY(op+4, match, 4); match -= dec64; } else { LZ4_copy8(op, match); match+=8; } op += 8; @@ -1095,7 +1095,7 @@ void Packet::armor(const void *key,bool encryptPayload,unsigned int counter) s20.crypt12(payload,payload,payloadLen); uint64_t mac[2]; Poly1305::compute(mac,payload,payloadLen,macKey); - memcpy(data + ZT_PACKET_IDX_MAC,mac,8); + ZT_FAST_MEMCPY(data + ZT_PACKET_IDX_MAC,mac,8); } } @@ -1167,7 +1167,7 @@ bool Packet::compress() if ((cl > 0)&&(cl < pl)) { data[ZT_PACKET_IDX_VERB] |= (char)ZT_PROTO_VERB_FLAG_COMPRESSED; setSize((unsigned int)cl + ZT_PACKET_IDX_PAYLOAD); - memcpy(data + ZT_PACKET_IDX_PAYLOAD,buf,cl); + ZT_FAST_MEMCPY(data + ZT_PACKET_IDX_PAYLOAD,buf,cl); return true; } } @@ -1187,7 +1187,7 @@ bool Packet::uncompress() int ucl = LZ4_decompress_safe((const char *)data + ZT_PACKET_IDX_PAYLOAD,buf,compLen,sizeof(buf)); if ((ucl > 0)&&(ucl <= (int)(capacity() - ZT_PACKET_IDX_PAYLOAD))) { setSize((unsigned int)ucl + ZT_PACKET_IDX_PAYLOAD); - memcpy(data + ZT_PACKET_IDX_PAYLOAD,buf,ucl); + ZT_FAST_MEMCPY(data + ZT_PACKET_IDX_PAYLOAD,buf,ucl); } else { return false; } diff --git a/node/Path.hpp b/node/Path.hpp index ab52ced6d..008b05c5f 100644 --- a/node/Path.hpp +++ b/node/Path.hpp @@ -73,10 +73,10 @@ public: _k[1] = (uint64_t)reinterpret_cast(&r)->sin_port; _k[2] = (uint64_t)l; } else if (r.ss_family == AF_INET6) { - memcpy(_k,reinterpret_cast(&r)->sin6_addr.s6_addr,16); + ZT_FAST_MEMCPY(_k,reinterpret_cast(&r)->sin6_addr.s6_addr,16); _k[2] = ((uint64_t)reinterpret_cast(&r)->sin6_port << 32) ^ (uint64_t)l; } else { - memcpy(_k,&r,std::min(sizeof(_k),sizeof(InetAddress))); + ZT_FAST_MEMCPY(_k,&r,std::min(sizeof(_k),sizeof(InetAddress))); _k[2] += (uint64_t)l; } } diff --git a/node/Revocation.hpp b/node/Revocation.hpp index 7f7498bbf..4a18914f0 100644 --- a/node/Revocation.hpp +++ b/node/Revocation.hpp @@ -166,7 +166,7 @@ public: if (b[p++] == 1) { if (b.template at(p) == ZT_C25519_SIGNATURE_LEN) { p += 2; - memcpy(_signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); + ZT_FAST_MEMCPY(_signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; } else throw ZT_EXCEPTION_INVALID_SERIALIZED_DATA_INVALID_CRYPTOGRAPHIC_TOKEN; } else { diff --git a/node/Tag.hpp b/node/Tag.hpp index fc1377de1..394c7be6f 100644 --- a/node/Tag.hpp +++ b/node/Tag.hpp @@ -163,7 +163,7 @@ public: if (b.template at(p) != ZT_C25519_SIGNATURE_LEN) throw ZT_EXCEPTION_INVALID_SERIALIZED_DATA_INVALID_CRYPTOGRAPHIC_TOKEN; p += 2; - memcpy(_signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; + ZT_FAST_MEMCPY(_signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; } else { p += 2 + b.template at(p); } diff --git a/node/Utils.hpp b/node/Utils.hpp index 87584fcfd..20108e271 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -40,6 +40,44 @@ #include "Constants.hpp" +// So it's 2017 and this still helps on most Linux versions. It shouldn't but it does. Go figure. +#if defined(__LINUX__) && ((defined(_MSC_VER) || defined(__GNUC__)) && (defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__) || defined(__AMD64) || defined(__AMD64__) || defined(_M_X64))) +#include +static inline void ZT_FAST_MEMCPY(void *a,const void *b,unsigned long k) +{ + char *aa = reinterpret_cast(a); + const char *bb = reinterpret_cast(b); + while (likely(k >= 128)) { + __m128i t1 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + __m128i t2 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + __m128i t3 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + __m128i t4 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t1); aa += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t2); aa += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t3); aa += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t4); aa += 16; + __m128i t5 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + __m128i t6 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + __m128i t7 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + __m128i t8 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t5); aa += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t6); aa += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t7); aa += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t8); aa += 16; + k -= 128; + } + while (likely(k >= 16)) { + __m128i t1 = _mm_loadu_si128(reinterpret_cast(bb)); bb += 16; + _mm_storeu_si128(reinterpret_cast<__m128i *>(aa),t1); aa += 16; + k -= 16; + } + for(unsigned long i=0;i(p); p += 8; _ts = b.template at(p); p += 8; - memcpy(_updatesMustBeSignedBy.data,b.field(p,ZT_C25519_PUBLIC_KEY_LEN),ZT_C25519_PUBLIC_KEY_LEN); p += ZT_C25519_PUBLIC_KEY_LEN; - memcpy(_signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; + ZT_FAST_MEMCPY(_updatesMustBeSignedBy.data,b.field(p,ZT_C25519_PUBLIC_KEY_LEN),ZT_C25519_PUBLIC_KEY_LEN); p += ZT_C25519_PUBLIC_KEY_LEN; + ZT_FAST_MEMCPY(_signature.data,b.field(p,ZT_C25519_SIGNATURE_LEN),ZT_C25519_SIGNATURE_LEN); p += ZT_C25519_SIGNATURE_LEN; const unsigned int numRoots = (unsigned int)b[p++]; if (numRoots > ZT_WORLD_MAX_ROOTS) throw ZT_EXCEPTION_INVALID_SERIALIZED_DATA_OVERFLOW; diff --git a/selftest.cpp b/selftest.cpp index c3b8ce2f0..86894564f 100644 --- a/selftest.cpp +++ b/selftest.cpp @@ -660,6 +660,50 @@ static int testOther() std::cout << " " << InetAddress("").toString(buf); std::cout << std::endl; + std::cout << "[other] Benchmarking memcpy... "; std::cout.flush(); + { + unsigned char *bb = (unsigned char *)::malloc(1234567); + unsigned char *cc = (unsigned char *)::malloc(1234567); + for(unsigned int i=0;i<1234567;++i) + bb[i] = (unsigned char)i; + double bytes = 0.0; + uint64_t start = OSUtils::now(); + for(unsigned int i=0;i<20000;++i) { + ++bb[i]; + ++bb[i+1]; + memcpy(cc,bb,1234567); + bytes += 1234567.0; + } + if (cc[0] != bb[0]) + abort(); + uint64_t end = OSUtils::now(); + std::cout << ((bytes / 1048576.0) / ((long double)(end - start) / 1024.0)) << " MiB/second" << std::endl; + ::free((void *)bb); + ::free((void *)cc); + } + + std::cout << "[other] Benchmarking ZT_FAST_MEMCPY... "; std::cout.flush(); + { + unsigned char *bb = (unsigned char *)::malloc(1234567); + unsigned char *cc = (unsigned char *)::malloc(1234567); + for(unsigned int i=0;i<1234567;++i) + bb[i] = (unsigned char)i; + double bytes = 0.0; + uint64_t start = OSUtils::now(); + for(unsigned int i=0;i<20000;++i) { + ++bb[0]; + ++bb[1234566]; + ZT_FAST_MEMCPY(cc,bb,1234567); + bytes += 1234567.0; + } + if (cc[0] != bb[0]) + abort(); + uint64_t end = OSUtils::now(); + std::cout << ((bytes / 1048576.0) / ((long double)(end - start) / 1024.0)) << " MiB/second" << std::endl; + ::free((void *)bb); + ::free((void *)cc); + } + #if 0 std::cout << "[other] Testing Hashtable... "; std::cout.flush(); { diff --git a/service/OneService.cpp b/service/OneService.cpp index b30dc390c..3ca75362e 100644 --- a/service/OneService.cpp +++ b/service/OneService.cpp @@ -1971,7 +1971,7 @@ public: // After setting up tap, fall through to CONFIG_UPDATE since we also want to do this... case ZT_VIRTUAL_NETWORK_CONFIG_OPERATION_CONFIG_UPDATE: - memcpy(&(n.config),nwc,sizeof(ZT_VirtualNetworkConfig)); + ZT_FAST_MEMCPY(&(n.config),nwc,sizeof(ZT_VirtualNetworkConfig)); if (n.tap) { // sanity check #ifdef __WINDOWS__ // wait for up to 5 seconds for the WindowsEthernetTap to actually be initialized @@ -2298,7 +2298,7 @@ public: else return 0; const std::vector *l = lh->get(ztaddr); if ((l)&&(l->size() > 0)) { - memcpy(result,&((*l)[(unsigned long)_node->prng() % l->size()]),sizeof(struct sockaddr_storage)); + ZT_FAST_MEMCPY(result,&((*l)[(unsigned long)_node->prng() % l->size()]),sizeof(struct sockaddr_storage)); return 1; } else return 0; }