Add a bunch of high-resolution try/catch safety in Node main loop to attempt to snare gremlin.

This commit is contained in:
Adam Ierymenko 2014-11-21 11:54:06 -08:00
parent 959f611a65
commit 0bcac1fd79

View file

@ -607,6 +607,7 @@ Node::ReasonForTermination Node::run()
} }
// Update network configurations when needed. // Update network configurations when needed.
try {
if ((resynchronize)||((now - lastNetworkAutoconfCheck) >= ZT_NETWORK_AUTOCONF_CHECK_DELAY)) { if ((resynchronize)||((now - lastNetworkAutoconfCheck) >= ZT_NETWORK_AUTOCONF_CHECK_DELAY)) {
lastNetworkAutoconfCheck = now; lastNetworkAutoconfCheck = now;
std::vector< SharedPtr<Network> > nets(RR->nc->networks()); std::vector< SharedPtr<Network> > nets(RR->nc->networks());
@ -615,18 +616,38 @@ Node::ReasonForTermination Node::run()
(*n)->requestConfiguration(); (*n)->requestConfiguration();
} }
} }
} catch ( ... ) {
LOG("unexpected exception updating network configurations (non-fatal, will retry)");
}
// Do periodic tasks in submodules. // Do periodic tasks in submodules.
if ((now - lastClean) >= ZT_DB_CLEAN_PERIOD) { if ((now - lastClean) >= ZT_DB_CLEAN_PERIOD) {
lastClean = now; lastClean = now;
try {
RR->topology->clean(now); RR->topology->clean(now);
} catch ( ... ) {
LOG("unexpected exception in Topology::clean() (non-fatal)");
}
try {
RR->mc->clean(now); RR->mc->clean(now);
} catch ( ... ) {
LOG("unexpected exception in Multicaster::clean() (non-fatal)");
}
try {
RR->nc->clean(); RR->nc->clean();
} catch ( ... ) {
LOG("unexpected exception in NodeConfig::clean() (non-fatal)");
}
try {
if (RR->updater) if (RR->updater)
RR->updater->checkIfMaxIntervalExceeded(now); RR->updater->checkIfMaxIntervalExceeded(now);
} catch ( ... ) {
LOG("unexpected exception in SoftwareUpdater::checkIfMaxIntervalExceeded() (non-fatal)");
}
} }
// Send beacons to physical local LANs // Send beacons to physical local LANs
try {
if ((resynchronize)||((now - lastBeacon) >= ZT_BEACON_INTERVAL)) { if ((resynchronize)||((now - lastBeacon) >= ZT_BEACON_INTERVAL)) {
lastBeacon = now; lastBeacon = now;
char bcn[ZT_PROTO_BEACON_LENGTH]; char bcn[ZT_PROTO_BEACON_LENGTH];
@ -639,8 +660,12 @@ Node::ReasonForTermination Node::run()
RR->antiRec->logOutgoingZT(bcn,ZT_PROTO_BEACON_LENGTH); RR->antiRec->logOutgoingZT(bcn,ZT_PROTO_BEACON_LENGTH);
RR->sm->send(ZT_DEFAULTS.v4Broadcast,false,false,bcn,ZT_PROTO_BEACON_LENGTH); RR->sm->send(ZT_DEFAULTS.v4Broadcast,false,false,bcn,ZT_PROTO_BEACON_LENGTH);
} }
} catch ( ... ) {
LOG("unexpected exception sending LAN beacon (non-fatal)");
}
// Check for updates to root topology (supernodes) periodically // Check for updates to root topology (supernodes) periodically
try {
if ((now - lastRootTopologyFetch) >= ZT_UPDATE_ROOT_TOPOLOGY_CHECK_INTERVAL) { if ((now - lastRootTopologyFetch) >= ZT_UPDATE_ROOT_TOPOLOGY_CHECK_INTERVAL) {
lastRootTopologyFetch = now; lastRootTopologyFetch = now;
if (!impl->disableRootTopologyUpdates) { if (!impl->disableRootTopologyUpdates) {
@ -648,6 +673,9 @@ Node::ReasonForTermination Node::run()
RR->http->GET(ZT_DEFAULTS.rootTopologyUpdateURL,HttpClient::NO_HEADERS,60,&_cbHandleGetRootTopology,RR); RR->http->GET(ZT_DEFAULTS.rootTopologyUpdateURL,HttpClient::NO_HEADERS,60,&_cbHandleGetRootTopology,RR);
} }
} }
} catch ( ... ) {
LOG("unexpected exception attempting to check for root topology updates (non-fatal)");
}
// Sleep for loop interval or until something interesting happens. // Sleep for loop interval or until something interesting happens.
try { try {