Such ping logic. So edge case.

This commit is contained in:
Adam Ierymenko 2014-04-03 14:36:52 -07:00
parent b1088a6bd7
commit c96d3ebf8c
5 changed files with 82 additions and 27 deletions

View file

@ -263,7 +263,7 @@ error_no_byte_order_defined;
#define ZT_NETWORK_FINGERPRINT_CHECK_DELAY 5000 #define ZT_NETWORK_FINGERPRINT_CHECK_DELAY 5000
/** /**
* Delay between pings (actually HELLOs) to direct links * Delay between ordinary case pings of direct links
*/ */
#define ZT_PEER_DIRECT_PING_DELAY 120000 #define ZT_PEER_DIRECT_PING_DELAY 120000
@ -297,12 +297,12 @@ error_no_byte_order_defined;
* *
* A link that hasn't spoken in this long is simply considered inactive. * A link that hasn't spoken in this long is simply considered inactive.
*/ */
#define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + 1000) #define ZT_PEER_PATH_ACTIVITY_TIMEOUT ((ZT_PEER_DIRECT_PING_DELAY * 2) + 10000)
/** /**
* Close TCP tunnels if unused for this long * Close TCP tunnels if unused for this long
*/ */
#define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT ZT_PEER_PATH_ACTIVITY_TIMEOUT #define ZT_TCP_TUNNEL_ACTIVITY_TIMEOUT (ZT_PEER_PATH_ACTIVITY_TIMEOUT + 10000)
/** /**
* Try TCP tunnels if nothing received for this long * Try TCP tunnels if nothing received for this long
@ -314,6 +314,11 @@ error_no_byte_order_defined;
*/ */
#define ZT_STARTUP_AGGRO 5000 #define ZT_STARTUP_AGGRO 5000
/**
* Time since a ping was sent to be considered unanswered
*/
#define ZT_PING_UNANSWERED_AFTER 2500
/** /**
* Stop relaying via peers that have not responded to direct sends in this long * Stop relaying via peers that have not responded to direct sends in this long
*/ */

View file

@ -536,10 +536,10 @@ Node::ReasonForTermination Node::run()
uint64_t lastNetworkAutoconfCheck = Utils::now() - 5000ULL; // check autoconf again after 5s for startup uint64_t lastNetworkAutoconfCheck = Utils::now() - 5000ULL; // check autoconf again after 5s for startup
uint64_t lastPingCheck = 0; uint64_t lastPingCheck = 0;
uint64_t lastSupernodePing = 0;
uint64_t lastClean = Utils::now(); // don't need to do this immediately uint64_t lastClean = Utils::now(); // don't need to do this immediately
uint64_t lastNetworkFingerprintCheck = 0; uint64_t lastNetworkFingerprintCheck = 0;
uint64_t lastMulticastCheck = 0; uint64_t lastMulticastCheck = 0;
uint64_t lastSupernodePingCheck = 0;
long lastDelayDelta = 0; long lastDelayDelta = 0;
uint64_t networkConfigurationFingerprint = 0; uint64_t networkConfigurationFingerprint = 0;
@ -592,13 +592,9 @@ Node::ReasonForTermination Node::run()
/* Ping supernodes separately, and do so more aggressively if we haven't /* Ping supernodes separately, and do so more aggressively if we haven't
* heard anything from anyone since our last resynchronize / startup. */ * heard anything from anyone since our last resynchronize / startup. */
if ( ((now - lastSupernodePing) >= ZT_PEER_DIRECT_PING_DELAY) || if ((now - lastSupernodePingCheck) >= ZT_STARTUP_AGGRO) {
((_r->timeOfLastResynchronize > _r->timeOfLastPacketReceived) && ((now - lastSupernodePing) >= ZT_STARTUP_AGGRO)) ) { lastSupernodePingCheck = now;
lastSupernodePing = now; _r->topology->eachSupernodePeer(Topology::PingSupernodesThatNeedPing(_r,now));
std::vector< SharedPtr<Peer> > sns(_r->topology->supernodePeers());
TRACE("pinging %d supernodes",(int)sns.size());
for(std::vector< SharedPtr<Peer> >::const_iterator p(sns.begin());p!=sns.end();++p)
(*p)->sendPing(_r,now);
} }
if (resynchronize) { if (resynchronize) {

View file

@ -216,7 +216,7 @@ bool Peer::isTcpFailoverTime(const RuntimeEnvironment *_r,uint64_t now) const
void Peer::clean(uint64_t now) void Peer::clean(uint64_t now)
{ {
Mutex::Lock _l(_lock); Mutex::Lock _l(_lock);
unsigned long i = 0,o = 0,l = _paths.size(); unsigned long i = 0,o = 0,l = (unsigned long)_paths.size();
while (i != l) { while (i != l) {
if (_paths[i].active(now)) if (_paths[i].active(now))
_paths[o++] = _paths[i]; _paths[o++] = _paths[i];

View file

@ -226,6 +226,26 @@ public:
return x; return x;
} }
/**
* @param _r Runtime environment
* @param now Current time
* @return True if the last ping is unanswered
*/
inline bool pingUnanswered(const RuntimeEnvironment *_r,uint64_t now)
throw()
{
uint64_t lp = 0;
uint64_t lr = 0;
{
Mutex::Lock _l(_lock);
for(std::vector<Path>::const_iterator p(_paths.begin());p!=_paths.end();++p) {
lp = std::max(p->lastPing(),lp);
lr = std::max(p->lastReceived(),lr);
}
}
return ( (lp > _r->timeOfLastResynchronize) && ((lr < lp)&&((lp - lr) >= ZT_PING_UNANSWERED_AFTER)) );
}
/** /**
* @return Time of most recent unicast frame received * @return Time of most recent unicast frame received
*/ */

View file

@ -180,6 +180,20 @@ public:
f(*this,p->second); f(*this,p->second);
} }
/**
* Apply a function or function object to all supernode peers
*
* @param f Function to apply
* @tparam F Function or function object type
*/
template<typename F>
inline void eachSupernodePeer(F f)
{
Mutex::Lock _l(_supernodes_m);
for(std::vector< SharedPtr<Peer> >::const_iterator p(_supernodePeers.begin());p!=_supernodePeers.end();++p)
f(*this,*p);
}
/** /**
* Function object to collect peers that need a firewall opener sent * Function object to collect peers that need a firewall opener sent
*/ */
@ -214,20 +228,16 @@ public:
inline void operator()(Topology &t,const SharedPtr<Peer> &p) inline void operator()(Topology &t,const SharedPtr<Peer> &p)
{ {
if ( /* For ordinary nodes we ping if they've sent us a frame recently,
/* 1: we have not heard anything directly in ZT_PEER_DIRECT_PING_DELAY ms */ * otherwise they are stale and we let the link die.
((_now - p->lastDirectReceive()) >= ZT_PEER_DIRECT_PING_DELAY) && *
/* 2: */ * Note that we measure ping time from time of last receive rather
( * than time of last send in order to only count full round trips. */
/* 2a: peer has direct path, and has sent us something recently */ if ( (!_supernodeAddresses.count(p->address())) &&
( ((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT) &&
(p->hasDirectPath())&& ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) ) {
((_now - p->lastFrame()) < ZT_PEER_PATH_ACTIVITY_TIMEOUT) p->sendPing(_r,_now);
) && }
/* 2b: peer is not a supernode */
(!_supernodeAddresses.count(p->address()))
)
) { p->sendPing(_r,_now); }
} }
private: private:
@ -236,6 +246,30 @@ public:
const RuntimeEnvironment *_r; const RuntimeEnvironment *_r;
}; };
/**
* Ping peers that need ping according to supernode rules (slightly more aggressive)
*/
class PingSupernodesThatNeedPing
{
public:
PingSupernodesThatNeedPing(const RuntimeEnvironment *renv,uint64_t now) throw() :
_now(now),
_r(renv) {}
inline void operator()(Topology &t,const SharedPtr<Peer> &p)
{
/* For supernodes we always ping even if no frames have been seen, and
* we ping aggressively if pings are unanswered. The limit to this
* frequency is set in the main loop to no more than ZT_STARTUP_AGGRO. */
if ( (p->pingUnanswered(_r,_now)) || ((_now - p->lastDirectReceive()) > ZT_PEER_DIRECT_PING_DELAY) )
p->sendPing(_r,_now);
}
private:
uint64_t _now;
const RuntimeEnvironment *_r;
};
/** /**
* Function object to forget direct links to active peers and then ping them indirectly * Function object to forget direct links to active peers and then ping them indirectly
* *