From 94f4316a0ecb56f7e34422a7dfed73efb2bbe1a3 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Mon, 9 Nov 2015 14:54:05 -0800 Subject: [PATCH] Fix for possible high CPU usage on multicast queries. --- node/IncomingPacket.cpp | 13 ++++++------- node/Node.cpp | 22 ++++++---------------- node/RuntimeEnvironment.hpp | 11 ++++++----- tests/http/big-test-start.sh | 3 ++- 4 files changed, 20 insertions(+), 29 deletions(-) diff --git a/node/IncomingPacket.cpp b/node/IncomingPacket.cpp index d655b8561..cffa0b9af 100644 --- a/node/IncomingPacket.cpp +++ b/node/IncomingPacket.cpp @@ -57,9 +57,8 @@ bool IncomingPacket::tryDecode(const RuntimeEnvironment *RR,bool deferred) if ((cipher() == ZT_PROTO_CIPHER_SUITE__C25519_POLY1305_NONE)&&(verb() == Packet::VERB_HELLO)) { // Unencrypted HELLOs require some potentially expensive verification, so // do this in the background if background processing is enabled. - DeferredPackets *const dp = RR->dp; // read volatile pointer - if ((dp)&&(!deferred)) { - dp->enqueue(this); + if ((RR->dpEnabled > 0)&&(!deferred)) { + RR->dp->enqueue(this); return true; // 'handled' via deferring to background thread(s) } else { // A null pointer for peer to _doHELLO() tells it to run its own @@ -405,12 +404,12 @@ bool IncomingPacket::_doOK(const RuntimeEnvironment *RR,const SharedPtr &p } break; case Packet::VERB_WHOIS: { - /* Right now only root servers are allowed to send OK(WHOIS) to prevent - * poisoning attacks. Further decentralization will require some other - * kind of trust mechanism. */ if (RR->topology->isRoot(peer->identity())) { const Identity id(*this,ZT_PROTO_VERB_WHOIS__OK__IDX_IDENTITY); - if (id.locallyValidate()) + // Right now we can skip this since OK(WHOIS) is only accepted from + // roots. In the future it should be done if we query less trusted + // sources. + //if (id.locallyValidate()) RR->sw->doAnythingWaitingForPeer(RR->topology->addPeer(SharedPtr(new Peer(RR->identity,id)))); } } break; diff --git a/node/Node.cpp b/node/Node.cpp index 6ba038eba..31757b458 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -116,7 +116,9 @@ Node::Node( RR->antiRec = new AntiRecursion(); RR->topology = new Topology(RR); RR->sa = new SelfAwareness(RR); + RR->dp = new DeferredPackets(RR); } catch ( ... ) { + delete RR->dp; delete RR->sa; delete RR->topology; delete RR->antiRec; @@ -131,14 +133,11 @@ Node::Node( Node::~Node() { Mutex::Lock _l(_networks_m); - Mutex::Lock _l2(RR->dpSetLock); _networks.clear(); // ensure that networks are destroyed before shutdow - DeferredPackets *dp = RR->dp; - RR->dp = (DeferredPackets *)0; - delete dp; - + RR->dpEnabled = 0; + delete RR->dp; delete RR->sa; delete RR->topology; delete RR->antiRec; @@ -647,23 +646,14 @@ void Node::clusterStatus(ZT_ClusterStatus *cs) void Node::backgroundThreadMain() { - RR->dpSetLock.lock(); - if (!RR->dp) { - try { - RR->dp = new DeferredPackets(RR); - } catch ( ... ) { // sanity check -- could only really happen if out of memory - RR->dpSetLock.unlock(); - return; - } - } - RR->dpSetLock.unlock(); - + ++RR->dpEnabled; for(;;) { try { if (RR->dp->process() < 0) break; } catch ( ... ) {} // sanity check -- should not throw } + --RR->dpEnabled; } /****************************************************************************/ diff --git a/node/RuntimeEnvironment.hpp b/node/RuntimeEnvironment.hpp index 18d9e8e5f..10cc6ec03 100644 --- a/node/RuntimeEnvironment.hpp +++ b/node/RuntimeEnvironment.hpp @@ -57,12 +57,12 @@ public: node(n) ,identity() ,localNetworkController((NetworkController *)0) - ,dp((DeferredPackets *)0) ,sw((Switch *)0) ,mc((Multicaster *)0) ,antiRec((AntiRecursion *)0) ,topology((Topology *)0) ,sa((SelfAwareness *)0) + ,dp((DeferredPackets *)0) #ifdef ZT_ENABLE_CLUSTER ,cluster((Cluster *)0) #endif @@ -80,10 +80,6 @@ public: // This is set externally to an instance of this base class NetworkController *localNetworkController; - // This is created if background threads call Node::backgroundThreadMain(). - DeferredPackets *volatile dp; // can be read without lock but not written - Mutex dpSetLock; - /* * Order matters a bit here. These are constructed in this order * and then deleted in the opposite order on Node exit. The order ensures @@ -97,10 +93,15 @@ public: AntiRecursion *antiRec; Topology *topology; SelfAwareness *sa; + DeferredPackets *dp; #ifdef ZT_ENABLE_CLUSTER Cluster *cluster; #endif + + // This is set to >0 if background threads are waiting on deferred + // packets, otherwise 'dp' should not be used. + volatile int dpEnabled; }; } // namespace ZeroTier diff --git a/tests/http/big-test-start.sh b/tests/http/big-test-start.sh index b2782ac2e..1b462285e 100755 --- a/tests/http/big-test-start.sh +++ b/tests/http/big-test-start.sh @@ -4,9 +4,10 @@ # 250 with a 16GB RAM VM like Amazon m4.xlarge seems good NUM_CONTAINERS=250 CONTAINER_IMAGE=zerotier/http-test +SCALE_UP_DELAY=4 export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin -pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo sysctl -w net.netfilter.nf_conntrack_max=262144 ; for ((n=0;n<$NUM_CONTAINERS;n++)); do sudo docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 10; done" +pssh -h big-test-hosts -x '-t -t' -i -OUserKnownHostsFile=/dev/null -OStrictHostKeyChecking=no -t 0 -p 256 "sudo sysctl -w net.netfilter.nf_conntrack_max=262144 ; for ((n=0;n<$NUM_CONTAINERS;n++)); do sudo docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep $SCALE_UP_DELAY; done" exit 0