From 7fbe2f7adf3575f3a21fc1ab3a5a2a036e18e6e2 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Mon, 2 Nov 2015 15:38:53 -0800 Subject: [PATCH] Tweak some more timings for better reliability. --- node/Cluster.hpp | 2 +- node/Constants.hpp | 12 ++++++------ node/Node.cpp | 2 +- node/Peer.hpp | 4 ++-- node/SelfAwareness.cpp | 2 +- node/Switch.cpp | 6 +++--- node/Topology.hpp | 9 ++++++--- tests/http/big-test-start.sh | 4 ++-- 8 files changed, 22 insertions(+), 19 deletions(-) diff --git a/node/Cluster.hpp b/node/Cluster.hpp index f1caa436d..ee2209998 100644 --- a/node/Cluster.hpp +++ b/node/Cluster.hpp @@ -55,7 +55,7 @@ /** * How often should we announce that we have a peer? */ -#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD (ZT_PEER_DIRECT_PING_DELAY / 2) +#define ZT_CLUSTER_HAVE_PEER_ANNOUNCE_PERIOD ZT_PEER_DIRECT_PING_DELAY /** * Desired period between doPeriodicTasks() in milliseconds diff --git a/node/Constants.hpp b/node/Constants.hpp index bb62484d0..552688a62 100644 --- a/node/Constants.hpp +++ b/node/Constants.hpp @@ -266,16 +266,16 @@ */ #define ZT_PEER_DIRECT_PING_DELAY 60000 +/** + * Timeout for overall peer activity (measured from last receive) + */ +#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 4) + ZT_PING_CHECK_INVERVAL) + /** * Delay between requests for updated network autoconf information */ #define ZT_NETWORK_AUTOCONF_DELAY 60000 -/** - * Timeout for overall peer activity (measured from last receive) - */ -#define ZT_PEER_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 3) + (ZT_PING_CHECK_INVERVAL * 2)) - /** * Minimum interval between attempts by relays to unite peers * @@ -283,7 +283,7 @@ * a RENDEZVOUS message no more than this often. This instructs the peers * to attempt NAT-t and gives each the other's corresponding IP:port pair. */ -#define ZT_MIN_UNITE_INTERVAL 60000 +#define ZT_MIN_UNITE_INTERVAL 30000 /** * Delay between initial direct NAT-t packet and more aggressive techniques diff --git a/node/Node.cpp b/node/Node.cpp index 74acc869b..82cb7ddbc 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -263,7 +263,7 @@ public: } lastReceiveFromUpstream = std::max(p->lastReceive(),lastReceiveFromUpstream); - } else if (p->alive(_now)) { + } else if (p->activelyTransferringFrames(_now)) { // Normal nodes get their preferred link kept alive if the node has generated frame traffic recently p->doPingAndKeepalive(RR,_now,0); } diff --git a/node/Peer.hpp b/node/Peer.hpp index e5db3bde5..ad4c67463 100644 --- a/node/Peer.hpp +++ b/node/Peer.hpp @@ -231,9 +231,9 @@ public: inline uint64_t lastAnnouncedTo() const throw() { return _lastAnnouncedTo; } /** - * @return True if peer has received an actual data frame within ZT_PEER_ACTIVITY_TIMEOUT milliseconds + * @return True if this peer is actively sending real network frames */ - inline uint64_t alive(uint64_t now) const throw() { return ((now - lastFrame()) < ZT_PEER_ACTIVITY_TIMEOUT); } + inline uint64_t activelyTransferringFrames(uint64_t now) const throw() { return ((now - lastFrame()) < ZT_PEER_ACTIVITY_TIMEOUT); } /** * @return Current latency or 0 if unknown (max: 65535) diff --git a/node/SelfAwareness.cpp b/node/SelfAwareness.cpp index d8eca0718..ce75eb03e 100644 --- a/node/SelfAwareness.cpp +++ b/node/SelfAwareness.cpp @@ -128,7 +128,7 @@ void SelfAwareness::iam(const Address &reporter,const InetAddress &reporterPhysi // links to be re-established if possible, possibly using a root server or some // other relay. for(std::vector< SharedPtr >::const_iterator p(rset.peersReset.begin());p!=rset.peersReset.end();++p) { - if ((*p)->alive(now)) { + if ((*p)->activelyTransferringFrames(now)) { Packet outp((*p)->address(),RR->identity.address(),Packet::VERB_NOP); RR->sw->send(outp,true,0); } diff --git a/node/Switch.cpp b/node/Switch.cpp index 2f72f57af..120ce7a4d 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -442,8 +442,8 @@ unsigned long Switch::doTimerTasks(uint64_t now) Mutex::Lock _l(_contactQueue_m); for(std::list::iterator qi(_contactQueue.begin());qi!=_contactQueue.end();) { if (now >= qi->fireAtTime) { - if ((!qi->peer->alive(now))||(qi->peer->hasActiveDirectPath(now))) { - // Cancel attempt if we've already connected or peer is no longer "alive" + if (qi->peer->hasActiveDirectPath(now)) { + // Cancel if connection has succeeded _contactQueue.erase(qi++); continue; } else { @@ -539,7 +539,7 @@ unsigned long Switch::doTimerTasks(uint64_t now) _LastUniteKey *k = (_LastUniteKey *)0; uint64_t *v = (uint64_t *)0; while (i.next(k,v)) { - if ((now - *v) >= (ZT_MIN_UNITE_INTERVAL * 16)) + if ((now - *v) >= (ZT_MIN_UNITE_INTERVAL * 8)) _lastUniteAttempt.erase(*k); } } diff --git a/node/Topology.hpp b/node/Topology.hpp index 4c1a2ab37..a0c28b0fe 100644 --- a/node/Topology.hpp +++ b/node/Topology.hpp @@ -81,6 +81,11 @@ public: /** * Get a peer only if it is presently in memory (no disk cache) * + * This also does not update the lastUsed() time for peers, which means + * that it won't prevent them from falling out of RAM. This is currently + * used in the Cluster code to update peer info without forcing all peers + * across the entire cluster to remain in memory cache. + * * @param zta ZeroTier address * @param now Current time */ @@ -88,10 +93,8 @@ public: { Mutex::Lock _l(_lock); const SharedPtr *const ap = _peers.get(zta); - if (ap) { - (*ap)->use(now); + if (ap) return *ap; - } return SharedPtr(); } diff --git a/tests/http/big-test-start.sh b/tests/http/big-test-start.sh index 43166c6eb..f300ac612 100755 --- a/tests/http/big-test-start.sh +++ b/tests/http/big-test-start.sh @@ -1,7 +1,7 @@ #!/bin/bash # Edit as needed -- note that >1000 per host is likely problematic due to Linux kernel limits -NUM_CONTAINERS=25 +NUM_CONTAINERS=50 CONTAINER_IMAGE=zerotier/http-test # @@ -25,6 +25,6 @@ export PATH=/bin:/usr/bin:/usr/local/bin:/usr/sbin:/sbin # docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE #done -pssh -h big-test-hosts -i -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done" +pssh -h big-test-hosts -o big-test-out -t 0 -p 256 "for ((n=0;n<$NUM_CONTAINERS;n++)); do docker run --device=/dev/net/tun --privileged -d $CONTAINER_IMAGE; sleep 0.25; done" exit 0