From 1c9ca73065975b137deb6770b4624886942c2605 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Wed, 15 Apr 2015 13:09:20 -0700 Subject: [PATCH] Fix some deadlock issues, move awareness of broadcast subscription into core, other bug fixes. --- include/ZeroTierOne.h | 23 ++++++++-------- node/Network.cpp | 48 ++++++++++++++++++++------------- node/Network.hpp | 20 +++++++++----- node/Node.cpp | 63 ++++++++++++++++++------------------------- node/Node.hpp | 6 ++--- node/Switch.cpp | 8 +++--- service/One.cpp | 45 ++++++++++++------------------- 7 files changed, 105 insertions(+), 108 deletions(-) diff --git a/include/ZeroTierOne.h b/include/ZeroTierOne.h index 6372641a1..45ed626ac 100644 --- a/include/ZeroTierOne.h +++ b/include/ZeroTierOne.h @@ -395,7 +395,7 @@ enum ZT1_VirtualNetworkConfigOperation }; /** - * Virtual LAN configuration + * Virtual network configuration */ typedef struct { @@ -448,9 +448,6 @@ typedef struct /** * If nonzero, this network supports and allows broadcast (ff:ff:ff:ff:ff:ff) traffic - * - * This is really just a hint to user code. If this is true, the user can - * subscribe to the broadcast group. If not, then the user shouldn't. */ int broadcastEnabled; @@ -630,6 +627,10 @@ typedef void ZT1_Node; * The supplied config pointer is not guaranteed to remain valid, so make * a copy if you want one. * + * This should not call multicastSubscribe() or other network-modifying + * methods, as this could cause a deadlock in multithreaded or interrupt + * driven environments. + * * This must return 0 on success. It can return any OS-dependent error code * on failure, and this results in the network being placed into the * PORT_ERROR state. @@ -759,7 +760,7 @@ void ZT1_Node_delete(ZT1_Node *node); * @param linkDesperation Link desperation metric for link or protocol over which packet arrived * @param packetData Packet data * @param packetLength Packet length - * @param nextBackgroundTaskDeadline Value/result: set to deadline for next call to one of the three processXXX() methods + * @param nextBackgroundTaskDeadline Value/result: set to deadline for next call to processBackgroundTasks() * @return OK (0) or error code if a fatal error condition has occurred */ enum ZT1_ResultCode ZT1_Node_processWirePacket( @@ -769,7 +770,7 @@ enum ZT1_ResultCode ZT1_Node_processWirePacket( unsigned int linkDesperation, const void *packetData, unsigned int packetLength, - uint64_t *nextBackgroundTaskDeadline); + volatile uint64_t *nextBackgroundTaskDeadline); /** * Process a frame from a virtual network port (tap) @@ -783,7 +784,7 @@ enum ZT1_ResultCode ZT1_Node_processWirePacket( * @param vlanId 10-bit VLAN ID or 0 if none * @param frameData Frame payload data * @param frameLength Frame payload length - * @param nextBackgroundTaskDeadline Value/result: set to deadline for next call to one of the three processXXX() methods + * @param nextBackgroundTaskDeadline Value/result: set to deadline for next call to processBackgroundTasks() * @return OK (0) or error code if a fatal error condition has occurred */ enum ZT1_ResultCode ZT1_Node_processVirtualNetworkFrame( @@ -796,17 +797,17 @@ enum ZT1_ResultCode ZT1_Node_processVirtualNetworkFrame( unsigned int vlanId, const void *frameData, unsigned int frameLength, - uint64_t *nextBackgroundTaskDeadline); + volatile uint64_t *nextBackgroundTaskDeadline); /** - * Perform required periodic operations even if no new frames or packets have arrived + * Perform periodic background operations * * @param node Node instance * @param now Current clock in milliseconds - * @param nextBackgroundTaskDeadline Value/result: set to deadline for next call to one of the three processXXX() methods + * @param nextBackgroundTaskDeadline Value/result: set to deadline for next call to processBackgroundTasks() * @return OK (0) or error code if a fatal error condition has occurred */ -enum ZT1_ResultCode ZT1_Node_processBackgroundTasks(ZT1_Node *node,uint64_t now,uint64_t *nextBackgroundTaskDeadline); +enum ZT1_ResultCode ZT1_Node_processBackgroundTasks(ZT1_Node *node,uint64_t now,volatile uint64_t *nextBackgroundTaskDeadline); /** * Join a network diff --git a/node/Network.cpp b/node/Network.cpp index ba0ee9841..7fa17ef15 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -47,6 +47,7 @@ Network::Network(const RuntimeEnvironment *renv,uint64_t nwid) : _id(nwid), _mac(renv->identity.address(),nwid), _enabled(true), + _portInitialized(false), _lastConfigUpdate(0), _destroyed(false), _netconfFailure(NETCONF_FAILURE_NONE), @@ -84,7 +85,6 @@ Network::Network(const RuntimeEnvironment *renv,uint64_t nwid) : const char *e = p + mcdb.length(); if (!memcmp("ZTMCD0",p,6)) { p += 6; - Mutex::Lock _l(_lock); while (p != e) { CertificateOfMembership com; com.deserialize2(p,e); @@ -97,9 +97,12 @@ Network::Network(const RuntimeEnvironment *renv,uint64_t nwid) : } catch ( ... ) {} // ignore invalid MCDB, we'll re-learn from peers } - ZT1_VirtualNetworkConfig ctmp; - _externalConfig(&ctmp); - _portError = RR->node->configureVirtualNetworkPort(_id,ZT1_VIRTUAL_NETWORK_CONFIG_OPERATION_UP,&ctmp); + if (!_portInitialized) { + ZT1_VirtualNetworkConfig ctmp; + _externalConfig(&ctmp); + _portError = RR->node->configureVirtualNetworkPort(_id,ZT1_VIRTUAL_NETWORK_CONFIG_OPERATION_UP,&ctmp); + _portInitialized = true; + } } Network::~Network() @@ -145,6 +148,8 @@ std::vector Network::allMulticastGroups() const if (!std::binary_search(mgs.begin(),oldend,i->first)) mgs.push_back(i->first); } + if ((_config)&&(_config->enableBroadcast())) + mgs.push_back(Network::BROADCAST); std::sort(mgs.begin(),mgs.end()); return mgs; } @@ -161,11 +166,13 @@ bool Network::subscribedToMulticastGroup(const MulticastGroup &mg,bool includeBr void Network::multicastSubscribe(const MulticastGroup &mg) { - Mutex::Lock _l(_lock); - if (std::binary_search(_myMulticastGroups.begin(),_myMulticastGroups.end(),mg)) - return; - _myMulticastGroups.push_back(mg); - std::sort(_myMulticastGroups.begin(),_myMulticastGroups.end()); + { + Mutex::Lock _l(_lock); + if (std::binary_search(_myMulticastGroups.begin(),_myMulticastGroups.end(),mg)) + return; + _myMulticastGroups.push_back(mg); + std::sort(_myMulticastGroups.begin(),_myMulticastGroups.end()); + } _announceMulticastGroups(); } @@ -183,19 +190,22 @@ void Network::multicastUnsubscribe(const MulticastGroup &mg) bool Network::applyConfiguration(const SharedPtr &conf) { - Mutex::Lock _l(_lock); - if (_destroyed) + if (_destroyed) // sanity check return false; try { if ((conf->networkId() == _id)&&(conf->issuedTo() == RR->identity.address())) { - _config = conf; - _lastConfigUpdate = RR->node->now(); - _netconfFailure = NETCONF_FAILURE_NONE; - ZT1_VirtualNetworkConfig ctmp; - _externalConfig(&ctmp); - _portError = RR->node->configureVirtualNetworkPort(_id,ZT1_VIRTUAL_NETWORK_CONFIG_OPERATION_CONFIG_UPDATE,&ctmp); - + bool portInitialized; + { + Mutex::Lock _l(_lock); + _config = conf; + _lastConfigUpdate = RR->node->now(); + _netconfFailure = NETCONF_FAILURE_NONE; + _externalConfig(&ctmp); + portInitialized = _portInitialized; + _portInitialized = true; + } + _portError = RR->node->configureVirtualNetworkPort(_id,(portInitialized) ? ZT1_VIRTUAL_NETWORK_CONFIG_OPERATION_CONFIG_UPDATE : ZT1_VIRTUAL_NETWORK_CONFIG_OPERATION_UP,&ctmp); return true; } else { TRACE("ignored invalid configuration for network %.16llx (configuration contains mismatched network ID or issued-to address)",(unsigned long long)_id); @@ -462,7 +472,7 @@ ZT1_VirtualNetworkStatus Network::_status() const case NETCONF_FAILURE_NOT_FOUND: return ZT1_NETWORK_STATUS_NOT_FOUND; case NETCONF_FAILURE_NONE: - return ((_lastConfigUpdate > 0) ? ZT1_NETWORK_STATUS_OK : ZT1_NETWORK_STATUS_REQUESTING_CONFIGURATION); + return ((_config) ? ZT1_NETWORK_STATUS_OK : ZT1_NETWORK_STATUS_REQUESTING_CONFIGURATION); default: return ZT1_NETWORK_STATUS_PORT_ERROR; } diff --git a/node/Network.hpp b/node/Network.hpp index 08d9e5278..213b44c59 100644 --- a/node/Network.hpp +++ b/node/Network.hpp @@ -65,6 +65,16 @@ class Network : NonCopyable public: /** + * Broadcast multicast group: ff:ff:ff:ff:ff:ff / 0 + */ + static const MulticastGroup BROADCAST; + + /** + * Construct a new network + * + * Note that init() should be called immediately after the network is + * constructed to actually configure the port. + * * @param renv Runtime environment * @param nwid Network ID */ @@ -72,11 +82,6 @@ public: ~Network(); - /** - * Broadcast multicast group: ff:ff:ff:ff:ff:ff / 0 - */ - static const MulticastGroup BROADCAST; - /** * @return Network ID */ @@ -348,7 +353,8 @@ private: const RuntimeEnvironment *RR; uint64_t _id; MAC _mac; // local MAC address - bool _enabled; + volatile bool _enabled; + volatile bool _portInitialized; std::vector< MulticastGroup > _myMulticastGroups; // multicast groups that we belong to including those behind us (updated periodically) std::map< MulticastGroup,uint64_t > _multicastGroupsBehindMe; // multicast groups bridged to us and when we last saw activity on each @@ -370,7 +376,7 @@ private: NETCONF_FAILURE_NOT_FOUND, NETCONF_FAILURE_INIT_FAILED } _netconfFailure; - int _portError; // return value from port config callback + volatile int _portError; // return value from port config callback Mutex _lock; diff --git a/node/Node.cpp b/node/Node.cpp index 6cdfc650b..c6ae54bdd 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -152,16 +152,10 @@ ZT1_ResultCode Node::processWirePacket( unsigned int linkDesperation, const void *packetData, unsigned int packetLength, - uint64_t *nextBackgroundTaskDeadline) + volatile uint64_t *nextBackgroundTaskDeadline) { - if (now >= *nextBackgroundTaskDeadline) { - ZT1_ResultCode rc = processBackgroundTasks(now,nextBackgroundTaskDeadline); - if (rc != ZT1_RESULT_OK) - return rc; - } else _now = now; - + _now = now; RR->sw->onRemotePacket(*(reinterpret_cast(remoteAddress)),linkDesperation,packetData,packetLength); - return ZT1_RESULT_OK; } @@ -174,20 +168,14 @@ ZT1_ResultCode Node::processVirtualNetworkFrame( unsigned int vlanId, const void *frameData, unsigned int frameLength, - uint64_t *nextBackgroundTaskDeadline) + volatile uint64_t *nextBackgroundTaskDeadline) { - if (now >= *nextBackgroundTaskDeadline) { - ZT1_ResultCode rc = processBackgroundTasks(now,nextBackgroundTaskDeadline); - if (rc != ZT1_RESULT_OK) - return rc; - } else _now = now; - - SharedPtr nw(network(nwid)); - if (nw) + _now = now; + SharedPtr nw(this->network(nwid)); + if (nw) { RR->sw->onLocalEthernet(nw,MAC(sourceMac),MAC(destMac),etherType,vlanId,frameData,frameLength); - else return ZT1_RESULT_ERROR_NETWORK_NOT_FOUND; - - return ZT1_RESULT_OK; + return ZT1_RESULT_OK; + } else return ZT1_RESULT_ERROR_NETWORK_NOT_FOUND; } class _PingPeersThatNeedPing @@ -217,7 +205,7 @@ private: std::vector
_supernodes; }; -ZT1_ResultCode Node::processBackgroundTasks(uint64_t now,uint64_t *nextBackgroundTaskDeadline) +ZT1_ResultCode Node::processBackgroundTasks(uint64_t now,volatile uint64_t *nextBackgroundTaskDeadline) { _now = now; Mutex::Lock bl(_backgroundTasksLock); @@ -260,6 +248,7 @@ ZT1_ResultCode Node::processBackgroundTasks(uint64_t now,uint64_t *nextBackgroun *(reinterpret_cast(beacon)) = RR->prng->next32(); *(reinterpret_cast(beacon + 4)) = RR->prng->next32(); RR->identity.address().copyTo(beacon + 8,5); + RR->antiRec->logOutgoingZT(beacon,13); putPacket(ZT_DEFAULTS.v4Broadcast,beacon,13,0); } } @@ -292,9 +281,9 @@ ZT1_ResultCode Node::processBackgroundTasks(uint64_t now,uint64_t *nextBackgroun ZT1_ResultCode Node::join(uint64_t nwid) { Mutex::Lock _l(_networks_m); - SharedPtr &nw = _networks[nwid]; - if (!nw) - nw = SharedPtr(new Network(RR,nwid)); + SharedPtr &nwe = _networks[nwid]; + if (!nwe) + nwe = SharedPtr(new Network(RR,nwid)); return ZT1_RESULT_OK; } @@ -311,20 +300,20 @@ ZT1_ResultCode Node::leave(uint64_t nwid) ZT1_ResultCode Node::multicastSubscribe(uint64_t nwid,uint64_t multicastGroup,unsigned long multicastAdi) { - Mutex::Lock _l(_networks_m); - std::map< uint64_t,SharedPtr >::iterator nw(_networks.find(nwid)); - if (nw != _networks.end()) - nw->second->multicastSubscribe(MulticastGroup(MAC(multicastGroup),(uint32_t)(multicastAdi & 0xffffffff))); - return ZT1_RESULT_OK; + SharedPtr nw(this->network(nwid)); + if (nw) { + nw->multicastSubscribe(MulticastGroup(MAC(multicastGroup),(uint32_t)(multicastAdi & 0xffffffff))); + return ZT1_RESULT_OK; + } else return ZT1_RESULT_ERROR_NETWORK_NOT_FOUND; } ZT1_ResultCode Node::multicastUnsubscribe(uint64_t nwid,uint64_t multicastGroup,unsigned long multicastAdi) { - Mutex::Lock _l(_networks_m); - std::map< uint64_t,SharedPtr >::iterator nw(_networks.find(nwid)); - if (nw != _networks.end()) - nw->second->multicastUnsubscribe(MulticastGroup(MAC(multicastGroup),(uint32_t)(multicastAdi & 0xffffffff))); - return ZT1_RESULT_OK; + SharedPtr nw(this->network(nwid)); + if (nw) { + nw->multicastUnsubscribe(MulticastGroup(MAC(multicastGroup),(uint32_t)(multicastAdi & 0xffffffff))); + return ZT1_RESULT_OK; + } else return ZT1_RESULT_ERROR_NETWORK_NOT_FOUND; } uint64_t Node::address() const @@ -531,7 +520,7 @@ enum ZT1_ResultCode ZT1_Node_processWirePacket( unsigned int linkDesperation, const void *packetData, unsigned int packetLength, - uint64_t *nextBackgroundTaskDeadline) + volatile uint64_t *nextBackgroundTaskDeadline) { try { return reinterpret_cast(node)->processWirePacket(now,remoteAddress,linkDesperation,packetData,packetLength,nextBackgroundTaskDeadline); @@ -553,7 +542,7 @@ enum ZT1_ResultCode ZT1_Node_processVirtualNetworkFrame( unsigned int vlanId, const void *frameData, unsigned int frameLength, - uint64_t *nextBackgroundTaskDeadline) + volatile uint64_t *nextBackgroundTaskDeadline) { try { return reinterpret_cast(node)->processVirtualNetworkFrame(now,nwid,sourceMac,destMac,etherType,vlanId,frameData,frameLength,nextBackgroundTaskDeadline); @@ -564,7 +553,7 @@ enum ZT1_ResultCode ZT1_Node_processVirtualNetworkFrame( } } -enum ZT1_ResultCode ZT1_Node_processBackgroundTasks(ZT1_Node *node,uint64_t now,uint64_t *nextBackgroundTaskDeadline) +enum ZT1_ResultCode ZT1_Node_processBackgroundTasks(ZT1_Node *node,uint64_t now,volatile uint64_t *nextBackgroundTaskDeadline) { try { return reinterpret_cast(node)->processBackgroundTasks(now,nextBackgroundTaskDeadline); diff --git a/node/Node.hpp b/node/Node.hpp index cd8914e6a..429e51716 100644 --- a/node/Node.hpp +++ b/node/Node.hpp @@ -83,7 +83,7 @@ public: unsigned int linkDesperation, const void *packetData, unsigned int packetLength, - uint64_t *nextBackgroundTaskDeadline); + volatile uint64_t *nextBackgroundTaskDeadline); ZT1_ResultCode processVirtualNetworkFrame( uint64_t now, uint64_t nwid, @@ -93,8 +93,8 @@ public: unsigned int vlanId, const void *frameData, unsigned int frameLength, - uint64_t *nextBackgroundTaskDeadline); - ZT1_ResultCode processBackgroundTasks(uint64_t now,uint64_t *nextBackgroundTaskDeadline); + volatile uint64_t *nextBackgroundTaskDeadline); + ZT1_ResultCode processBackgroundTasks(uint64_t now,volatile uint64_t *nextBackgroundTaskDeadline); ZT1_ResultCode join(uint64_t nwid); ZT1_ResultCode leave(uint64_t nwid); ZT1_ResultCode multicastSubscribe(uint64_t nwid,uint64_t multicastGroup,unsigned long multicastAdi); diff --git a/node/Switch.cpp b/node/Switch.cpp index 3710158db..6bc044c0e 100644 --- a/node/Switch.cpp +++ b/node/Switch.cpp @@ -153,7 +153,7 @@ void Switch::onLocalEthernet(const SharedPtr &network,const MAC &from,c return; } - //TRACE("%.16llx: MULTICAST %s -> %s %s %u",network->id(),from.toString().c_str(),mg.toString().c_str(),etherTypeName(etherType),len); + TRACE("%.16llx: MULTICAST %s -> %s %s %u",network->id(),from.toString().c_str(),mg.toString().c_str(),etherTypeName(etherType),len); RR->mc->send( ((!nconf->isPublic())&&(nconf->com())) ? &(nconf->com()) : (const CertificateOfMembership *)0, @@ -171,7 +171,7 @@ void Switch::onLocalEthernet(const SharedPtr &network,const MAC &from,c } if (to[0] == MAC::firstOctetForNetwork(network->id())) { - // Destination is another ZeroTier peer + // Destination is another ZeroTier peer on the same network Address toZT(to.toAddress(network->id())); if (network->isAllowed(toZT)) { @@ -203,8 +203,10 @@ void Switch::onLocalEthernet(const SharedPtr &network,const MAC &from,c outp.compress(); send(outp,true); } + + TRACE("%.16llx: UNICAST: %s -> %s etherType==%s(%.4x) vlanId==%u len==%u fromBridged==%d",network->id(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType),etherType,vlanId,len,(int)fromBridged); } else { - TRACE("%.16llx: UNICAST: %s -> %s %s dropped, destination not a member of private network",network->id(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType)); + TRACE("%.16llx: UNICAST: %s -> %s etherType==%s dropped, destination not a member of private network",network->id(),from.toString().c_str(),to.toString().c_str(),etherTypeName(etherType)); } return; diff --git a/service/One.cpp b/service/One.cpp index bc42bba8a..287f78131 100644 --- a/service/One.cpp +++ b/service/One.cpp @@ -228,19 +228,22 @@ public: uint64_t dl = _nextBackgroundTaskDeadline; uint64_t now = OSUtils::now(); - if (dl <= now) { - if ((now - lastTapMulticastGroupCheck) >= ZT_TAP_CHECK_MULTICAST_INTERVAL) { - lastTapMulticastGroupCheck = now; - Mutex::Lock _l(_taps_m); - for(std::map< uint64_t,EthernetTap *>::const_iterator t(_taps.begin());t!=_taps.end();++t) - _updateMulticastGroups(t->first,t->second); - } - _node->processBackgroundTasks(now,&_nextBackgroundTaskDeadline); - dl = _nextBackgroundTaskDeadline; - now = OSUtils::now(); + } + + if ((now - lastTapMulticastGroupCheck) >= ZT_TAP_CHECK_MULTICAST_INTERVAL) { + lastTapMulticastGroupCheck = now; + Mutex::Lock _l(_taps_m); + for(std::map< uint64_t,EthernetTap *>::const_iterator t(_taps.begin());t!=_taps.end();++t) { + std::vector added,removed; + t->second->scanMulticastGroups(added,removed); + for(std::vector::iterator m(added.begin());m!=added.end();++m) + _node->multicastSubscribe(t->first,m->mac().toInt(),m->adi()); + for(std::vector::iterator m(removed.begin());m!=removed.end();++m) + _node->multicastUnsubscribe(t->first,m->mac().toInt(),m->adi()); + } } const unsigned long delay = (dl > now) ? (unsigned long)(dl - now) : 100; @@ -409,7 +412,7 @@ public: StapFrameHandler, (void *)this))).first; } catch ( ... ) { - return -999; + return -999; // tap init failed } } // fall through... @@ -432,11 +435,8 @@ public: t->second->removeIp(*ip); } assignedIps.swap(newAssignedIps); - - _updateMulticastGroups(t->first,t->second); - if (nwc->broadcastEnabled) - _node->multicastSubscribe(nwid,0xffffffffffffULL,0); - else _node->multicastUnsubscribe(nwid,0xffffffffffffULL,0); + } else { + return -999; // tap init failed } break; case ZT1_VIRTUAL_NETWORK_CONFIG_OPERATION_DOWN: @@ -627,17 +627,6 @@ private: return p; } - void _updateMulticastGroups(uint64_t nwid,EthernetTap *tap) - { - // assumes _taps_m is locked - std::vector added,removed; - tap->scanMulticastGroups(added,removed); - for(std::vector::iterator m(added.begin());m!=added.end();++m) - _node->multicastSubscribe(nwid,m->mac().toInt(),m->adi()); - for(std::vector::iterator m(removed.begin());m!=removed.end();++m) - _node->multicastUnsubscribe(nwid,m->mac().toInt(),m->adi()); - } - const std::string _homePath; Phy _phy; NetworkConfigMaster *_master; @@ -648,7 +637,7 @@ private: PhySocket *_v4TcpListenSocket; PhySocket *_v6TcpListenSocket; ControlPlane *_controlPlane; - uint64_t _nextBackgroundTaskDeadline; + volatile uint64_t _nextBackgroundTaskDeadline; std::map< uint64_t,EthernetTap * > _taps; std::map< uint64_t,std::vector > _tapAssignedIps; // ZeroTier assigned IPs, not user or dhcp assigned