Add a bunch of high-resolution try/catch safety in Node main loop to attempt to snare gremlin.

This commit is contained in:
Adam Ierymenko 2014-11-21 11:54:06 -08:00
parent 959f611a65
commit 0bcac1fd79

View file

@ -607,46 +607,74 @@ Node::ReasonForTermination Node::run()
} }
// Update network configurations when needed. // Update network configurations when needed.
if ((resynchronize)||((now - lastNetworkAutoconfCheck) >= ZT_NETWORK_AUTOCONF_CHECK_DELAY)) { try {
lastNetworkAutoconfCheck = now; if ((resynchronize)||((now - lastNetworkAutoconfCheck) >= ZT_NETWORK_AUTOCONF_CHECK_DELAY)) {
std::vector< SharedPtr<Network> > nets(RR->nc->networks()); lastNetworkAutoconfCheck = now;
for(std::vector< SharedPtr<Network> >::iterator n(nets.begin());n!=nets.end();++n) { std::vector< SharedPtr<Network> > nets(RR->nc->networks());
if ((now - (*n)->lastConfigUpdate()) >= ZT_NETWORK_AUTOCONF_DELAY) for(std::vector< SharedPtr<Network> >::iterator n(nets.begin());n!=nets.end();++n) {
(*n)->requestConfiguration(); if ((now - (*n)->lastConfigUpdate()) >= ZT_NETWORK_AUTOCONF_DELAY)
(*n)->requestConfiguration();
}
} }
} catch ( ... ) {
LOG("unexpected exception updating network configurations (non-fatal, will retry)");
} }
// Do periodic tasks in submodules. // Do periodic tasks in submodules.
if ((now - lastClean) >= ZT_DB_CLEAN_PERIOD) { if ((now - lastClean) >= ZT_DB_CLEAN_PERIOD) {
lastClean = now; lastClean = now;
RR->topology->clean(now); try {
RR->mc->clean(now); RR->topology->clean(now);
RR->nc->clean(); } catch ( ... ) {
if (RR->updater) LOG("unexpected exception in Topology::clean() (non-fatal)");
RR->updater->checkIfMaxIntervalExceeded(now); }
try {
RR->mc->clean(now);
} catch ( ... ) {
LOG("unexpected exception in Multicaster::clean() (non-fatal)");
}
try {
RR->nc->clean();
} catch ( ... ) {
LOG("unexpected exception in NodeConfig::clean() (non-fatal)");
}
try {
if (RR->updater)
RR->updater->checkIfMaxIntervalExceeded(now);
} catch ( ... ) {
LOG("unexpected exception in SoftwareUpdater::checkIfMaxIntervalExceeded() (non-fatal)");
}
} }
// Send beacons to physical local LANs // Send beacons to physical local LANs
if ((resynchronize)||((now - lastBeacon) >= ZT_BEACON_INTERVAL)) { try {
lastBeacon = now; if ((resynchronize)||((now - lastBeacon) >= ZT_BEACON_INTERVAL)) {
char bcn[ZT_PROTO_BEACON_LENGTH]; lastBeacon = now;
void *bcnptr = bcn; char bcn[ZT_PROTO_BEACON_LENGTH];
*((uint32_t *)(bcnptr)) = RR->prng->next32(); void *bcnptr = bcn;
bcnptr = bcn + 4; *((uint32_t *)(bcnptr)) = RR->prng->next32();
*((uint32_t *)(bcnptr)) = RR->prng->next32(); bcnptr = bcn + 4;
RR->identity.address().copyTo(bcn + ZT_PROTO_BEACON_IDX_ADDRESS,ZT_ADDRESS_LENGTH); *((uint32_t *)(bcnptr)) = RR->prng->next32();
TRACE("sending LAN beacon to %s",ZT_DEFAULTS.v4Broadcast.toString().c_str()); RR->identity.address().copyTo(bcn + ZT_PROTO_BEACON_IDX_ADDRESS,ZT_ADDRESS_LENGTH);
RR->antiRec->logOutgoingZT(bcn,ZT_PROTO_BEACON_LENGTH); TRACE("sending LAN beacon to %s",ZT_DEFAULTS.v4Broadcast.toString().c_str());
RR->sm->send(ZT_DEFAULTS.v4Broadcast,false,false,bcn,ZT_PROTO_BEACON_LENGTH); RR->antiRec->logOutgoingZT(bcn,ZT_PROTO_BEACON_LENGTH);
RR->sm->send(ZT_DEFAULTS.v4Broadcast,false,false,bcn,ZT_PROTO_BEACON_LENGTH);
}
} catch ( ... ) {
LOG("unexpected exception sending LAN beacon (non-fatal)");
} }
// Check for updates to root topology (supernodes) periodically // Check for updates to root topology (supernodes) periodically
if ((now - lastRootTopologyFetch) >= ZT_UPDATE_ROOT_TOPOLOGY_CHECK_INTERVAL) { try {
lastRootTopologyFetch = now; if ((now - lastRootTopologyFetch) >= ZT_UPDATE_ROOT_TOPOLOGY_CHECK_INTERVAL) {
if (!impl->disableRootTopologyUpdates) { lastRootTopologyFetch = now;
TRACE("fetching root topology from %s",ZT_DEFAULTS.rootTopologyUpdateURL.c_str()); if (!impl->disableRootTopologyUpdates) {
RR->http->GET(ZT_DEFAULTS.rootTopologyUpdateURL,HttpClient::NO_HEADERS,60,&_cbHandleGetRootTopology,RR); TRACE("fetching root topology from %s",ZT_DEFAULTS.rootTopologyUpdateURL.c_str());
RR->http->GET(ZT_DEFAULTS.rootTopologyUpdateURL,HttpClient::NO_HEADERS,60,&_cbHandleGetRootTopology,RR);
}
} }
} catch ( ... ) {
LOG("unexpected exception attempting to check for root topology updates (non-fatal)");
} }
// Sleep for loop interval or until something interesting happens. // Sleep for loop interval or until something interesting happens.