From de6fadc12dca615dde185e48b01f5cacd951104a Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Wed, 7 Apr 2021 19:31:53 -0400 Subject: [PATCH] Fix a bug in Topology path GC and make it more efficient. --- core/Node.cpp | 27 +++++++++++++-------------- core/SharedPtr.hpp | 19 +++++++++++-------- core/Topology.cpp | 39 ++++++++++++++++++++------------------- 3 files changed, 44 insertions(+), 41 deletions(-) diff --git a/core/Node.cpp b/core/Node.cpp index dbcb194ee..ff10bfd78 100644 --- a/core/Node.cpp +++ b/core/Node.cpp @@ -211,15 +211,16 @@ ZT_ResultCode Node::processBackgroundTasks( try { Vector< SharedPtr< Peer > > allPeers, rootPeers; m_ctx.topology->allPeers(allPeers, rootPeers); + std::sort(rootPeers.begin(), rootPeers.end()); bool online = false; for (Vector< SharedPtr< Peer > >::iterator p(allPeers.begin()); p != allPeers.end(); ++p) { - const bool isRoot = std::find(rootPeers.begin(), rootPeers.end(), *p) != rootPeers.end(); + const bool isRoot = std::binary_search(rootPeers.begin(), rootPeers.end(), *p); (*p)->pulse(m_ctx, cc, isRoot); online |= ((isRoot || rootPeers.empty()) && (*p)->directlyConnected(cc)); } - if (m_online.exchange(online) != online) + if (m_online.exchange(online, std::memory_order_relaxed) != online) postEvent(cc.tPtr, online ? ZT_EVENT_ONLINE : ZT_EVENT_OFFLINE); ZT_SPEW("ranking roots..."); @@ -346,9 +347,9 @@ struct p_ZT_PeerListPrivate : public ZT_PeerList { // Actual containers for the memory, hidden from external users. Vector< ZT_Peer > p_peers; - List< Vector< ZT_Path > > p_paths; - List< Identity > p_identities; - List< Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > > p_locators; + ForwardList< Vector< ZT_Path > > p_paths; + ForwardList< Identity > p_identities; + ForwardList< Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > > p_locators; }; static void p_peerListFreeFunction(const void *pl) @@ -381,9 +382,9 @@ ZT_PeerList *Node::peers(const CallContext &cc) const Peer &pp = **pi; p.address = pp.address(); - pl->p_identities.push_back(pp.identity()); - p.identity = reinterpret_cast(&(pl->p_identities.back())); - p.fingerprint = &(pl->p_identities.back().fingerprint()); + pl->p_identities.push_front(pp.identity()); + p.identity = reinterpret_cast(&(pl->p_identities.front())); + p.fingerprint = &(pl->p_identities.front().fingerprint()); if (pp.remoteVersionKnown()) { p.versionMajor = (int)pp.remoteVersionMajor(); p.versionMinor = (int)pp.remoteVersionMinor(); @@ -404,8 +405,8 @@ ZT_PeerList *Node::peers(const CallContext &cc) const Vector< SharedPtr< Path > > ztPaths; pp.getAllPaths(ztPaths); if (ztPaths.empty()) { - pl->p_paths.push_back(Vector< ZT_Path >()); - std::vector< ZT_Path > &apiPaths = pl->p_paths.back(); + pl->p_paths.push_front(Vector< ZT_Path >()); + std::vector< ZT_Path > &apiPaths = pl->p_paths.front(); apiPaths.resize(ztPaths.size()); for (unsigned long i = 0; i < (unsigned long)ztPaths.size(); ++i) { SharedPtr< Path > &ztp = ztPaths[i]; @@ -426,8 +427,8 @@ ZT_PeerList *Node::peers(const CallContext &cc) const const SharedPtr< const Locator > loc(pp.locator()); if (loc) { - pl->p_locators.push_back(Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX >()); - Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > &lb = pl->p_locators.back(); + pl->p_locators.push_front(Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX >()); + Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > &lb = pl->p_locators.front(); Utils::zero< ZT_LOCATOR_MARSHAL_SIZE_MAX >(lb.data); const int ls = loc->marshal(lb.data); if (ls > 0) { @@ -611,8 +612,6 @@ void Node::setController(void *networkControllerInstance) m_ctx.localNetworkController->init(m_ctx.identity, this); } -// Methods used only within the core ---------------------------------------------------------------------------------- - bool Node::shouldUsePathForZeroTierTraffic(void *tPtr, const Identity &id, const int64_t localSocket, const InetAddress &remoteAddress) { { diff --git a/core/SharedPtr.hpp b/core/SharedPtr.hpp index b77a33dbb..3325621c8 100644 --- a/core/SharedPtr.hpp +++ b/core/SharedPtr.hpp @@ -116,20 +116,23 @@ public: * but with the caveat that it only works if there is only one remaining * SharedPtr to be treated as weak. * - * @return True if object was in fact deleted OR this pointer was already NULL + * This does not delete the object. It returns it as a naked pointer. + * + * @return Pointer to T if reference count was only one (this shared ptr is left NULL) */ - ZT_INLINE bool weakGC() + ZT_INLINE T *weakGC() { if (likely(m_ptr != nullptr)) { int one = 1; - if (const_cast *>(&(m_ptr->__refCount))->compare_exchange_strong(one, (int)0, std::memory_order_acq_rel)) { - delete m_ptr; + if (const_cast *>(&(m_ptr->__refCount))->compare_exchange_strong(one, (int)0)) { + T *const ptr = m_ptr; m_ptr = nullptr; - return true; + return ptr; + } else { + return nullptr; } - return false; } else { - return true; + return nullptr; } } @@ -141,7 +144,7 @@ public: ZT_INLINE int references() noexcept { if (likely(m_ptr != nullptr)) - return m_ptr->__refCount; + return m_ptr->__refCount.load(std::memory_order_relaxed); return 0; } diff --git a/core/Topology.cpp b/core/Topology.cpp index 7e99f50dd..28c0a09aa 100644 --- a/core/Topology.cpp +++ b/core/Topology.cpp @@ -61,6 +61,7 @@ void Topology::doPeriodicTasks(const CallContext &cc) for (Vector< SharedPtr< Peer > >::const_iterator r(m_roots.begin()); r != m_roots.end(); ++r) rootLookup.push_back((uintptr_t)r->ptr()); } + std::sort(rootLookup.begin(), rootLookup.end()); // Cleaning of peers and paths uses a two pass method to avoid write locking // m_peers or m_paths for any significant amount of time. This avoids pauses @@ -71,7 +72,7 @@ void Topology::doPeriodicTasks(const CallContext &cc) RWMutex::RLock l1(m_peers_l); for (Map< Address, SharedPtr< Peer > >::iterator i(m_peers.begin()); i != m_peers.end(); ++i) { // TODO: also delete if the peer has not exchanged meaningful communication in a while, such as a network frame or non-trivial control packet. - if (((cc.ticks - i->second->lastReceive()) > ZT_PEER_ALIVE_TIMEOUT) && (std::find(rootLookup.begin(), rootLookup.end(), (uintptr_t)(i->second.ptr())) == rootLookup.end())) + if (((cc.ticks - i->second->lastReceive()) > ZT_PEER_ALIVE_TIMEOUT) && (!std::binary_search(rootLookup.begin(), rootLookup.end(), reinterpret_cast(i->second.ptr())))) toDelete.push_back(i->first); } } @@ -93,28 +94,28 @@ void Topology::doPeriodicTasks(const CallContext &cc) } } - // Delete paths that are no longer held by anyone else ("weak reference" type behavior). - // First pass: make a list of paths with a reference count of 1 meaning they are likely - // orphaned. Second pass: call weakGC() on each of these which does a hard compare/exchange - // and delete those that actually are GC'd. Write lock is aquired only briefly on delete - // just as with peers. { - Vector< Path::Key > possibleDelete; + Vector< Path * > toDelete; { - RWMutex::RLock l1(m_paths_l); - for (Map< Path::Key, SharedPtr< Path > >::iterator i(m_paths.begin()); i != m_paths.end(); ++i) { - if (i->second.references() <= 1) - possibleDelete.push_back(i->first); + RWMutex::Lock l1(m_paths_l); + for (Map< Path::Key, SharedPtr< Path > >::iterator i(m_paths.begin()); i != m_paths.end();) { + Path *const d = i->second.weakGC(); + if (likely(d == nullptr)) { + ++i; + } else { + m_paths.erase(i++); + try { + toDelete.push_back(d); + } catch (...) { + delete d; + } + } } } - if (!possibleDelete.empty()) { - ZT_SPEW("garbage collecting (likely) %u orphaned paths", (unsigned int)possibleDelete.size()); - for (Vector< Path::Key >::const_iterator i(possibleDelete.begin()); i != possibleDelete.end(); ++i) { - RWMutex::Lock l1(m_paths_l); - Map< Path::Key, SharedPtr< Path > >::iterator p(m_paths.find(*i)); - if ((p != m_paths.end()) && p->second.weakGC()) - m_paths.erase(p); - } + if (!toDelete.empty()) { + for (Vector< Path * >::iterator i(toDelete.begin()); i != toDelete.end(); ++i) + delete *i; + ZT_SPEW("garbage collected %u orphaned paths", (unsigned int)toDelete.size()); } } }