Fix a bug in Topology path GC and make it more efficient.

This commit is contained in:
Adam Ierymenko 2021-04-07 19:31:53 -04:00
parent e26e77dd6d
commit de6fadc12d
No known key found for this signature in database
GPG key ID: C8877CF2D7A5D7F3
3 changed files with 44 additions and 41 deletions

View file

@ -211,15 +211,16 @@ ZT_ResultCode Node::processBackgroundTasks(
try { try {
Vector< SharedPtr< Peer > > allPeers, rootPeers; Vector< SharedPtr< Peer > > allPeers, rootPeers;
m_ctx.topology->allPeers(allPeers, rootPeers); m_ctx.topology->allPeers(allPeers, rootPeers);
std::sort(rootPeers.begin(), rootPeers.end());
bool online = false; bool online = false;
for (Vector< SharedPtr< Peer > >::iterator p(allPeers.begin()); p != allPeers.end(); ++p) { for (Vector< SharedPtr< Peer > >::iterator p(allPeers.begin()); p != allPeers.end(); ++p) {
const bool isRoot = std::find(rootPeers.begin(), rootPeers.end(), *p) != rootPeers.end(); const bool isRoot = std::binary_search(rootPeers.begin(), rootPeers.end(), *p);
(*p)->pulse(m_ctx, cc, isRoot); (*p)->pulse(m_ctx, cc, isRoot);
online |= ((isRoot || rootPeers.empty()) && (*p)->directlyConnected(cc)); online |= ((isRoot || rootPeers.empty()) && (*p)->directlyConnected(cc));
} }
if (m_online.exchange(online) != online) if (m_online.exchange(online, std::memory_order_relaxed) != online)
postEvent(cc.tPtr, online ? ZT_EVENT_ONLINE : ZT_EVENT_OFFLINE); postEvent(cc.tPtr, online ? ZT_EVENT_ONLINE : ZT_EVENT_OFFLINE);
ZT_SPEW("ranking roots..."); ZT_SPEW("ranking roots...");
@ -346,9 +347,9 @@ struct p_ZT_PeerListPrivate : public ZT_PeerList
{ {
// Actual containers for the memory, hidden from external users. // Actual containers for the memory, hidden from external users.
Vector< ZT_Peer > p_peers; Vector< ZT_Peer > p_peers;
List< Vector< ZT_Path > > p_paths; ForwardList< Vector< ZT_Path > > p_paths;
List< Identity > p_identities; ForwardList< Identity > p_identities;
List< Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > > p_locators; ForwardList< Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > > p_locators;
}; };
static void p_peerListFreeFunction(const void *pl) static void p_peerListFreeFunction(const void *pl)
@ -381,9 +382,9 @@ ZT_PeerList *Node::peers(const CallContext &cc) const
Peer &pp = **pi; Peer &pp = **pi;
p.address = pp.address(); p.address = pp.address();
pl->p_identities.push_back(pp.identity()); pl->p_identities.push_front(pp.identity());
p.identity = reinterpret_cast<const ZT_Identity *>(&(pl->p_identities.back())); p.identity = reinterpret_cast<const ZT_Identity *>(&(pl->p_identities.front()));
p.fingerprint = &(pl->p_identities.back().fingerprint()); p.fingerprint = &(pl->p_identities.front().fingerprint());
if (pp.remoteVersionKnown()) { if (pp.remoteVersionKnown()) {
p.versionMajor = (int)pp.remoteVersionMajor(); p.versionMajor = (int)pp.remoteVersionMajor();
p.versionMinor = (int)pp.remoteVersionMinor(); p.versionMinor = (int)pp.remoteVersionMinor();
@ -404,8 +405,8 @@ ZT_PeerList *Node::peers(const CallContext &cc) const
Vector< SharedPtr< Path > > ztPaths; Vector< SharedPtr< Path > > ztPaths;
pp.getAllPaths(ztPaths); pp.getAllPaths(ztPaths);
if (ztPaths.empty()) { if (ztPaths.empty()) {
pl->p_paths.push_back(Vector< ZT_Path >()); pl->p_paths.push_front(Vector< ZT_Path >());
std::vector< ZT_Path > &apiPaths = pl->p_paths.back(); std::vector< ZT_Path > &apiPaths = pl->p_paths.front();
apiPaths.resize(ztPaths.size()); apiPaths.resize(ztPaths.size());
for (unsigned long i = 0; i < (unsigned long)ztPaths.size(); ++i) { for (unsigned long i = 0; i < (unsigned long)ztPaths.size(); ++i) {
SharedPtr< Path > &ztp = ztPaths[i]; SharedPtr< Path > &ztp = ztPaths[i];
@ -426,8 +427,8 @@ ZT_PeerList *Node::peers(const CallContext &cc) const
const SharedPtr< const Locator > loc(pp.locator()); const SharedPtr< const Locator > loc(pp.locator());
if (loc) { if (loc) {
pl->p_locators.push_back(Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX >()); pl->p_locators.push_front(Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX >());
Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > &lb = pl->p_locators.back(); Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > &lb = pl->p_locators.front();
Utils::zero< ZT_LOCATOR_MARSHAL_SIZE_MAX >(lb.data); Utils::zero< ZT_LOCATOR_MARSHAL_SIZE_MAX >(lb.data);
const int ls = loc->marshal(lb.data); const int ls = loc->marshal(lb.data);
if (ls > 0) { if (ls > 0) {
@ -611,8 +612,6 @@ void Node::setController(void *networkControllerInstance)
m_ctx.localNetworkController->init(m_ctx.identity, this); m_ctx.localNetworkController->init(m_ctx.identity, this);
} }
// Methods used only within the core ----------------------------------------------------------------------------------
bool Node::shouldUsePathForZeroTierTraffic(void *tPtr, const Identity &id, const int64_t localSocket, const InetAddress &remoteAddress) bool Node::shouldUsePathForZeroTierTraffic(void *tPtr, const Identity &id, const int64_t localSocket, const InetAddress &remoteAddress)
{ {
{ {

View file

@ -116,20 +116,23 @@ public:
* but with the caveat that it only works if there is only one remaining * but with the caveat that it only works if there is only one remaining
* SharedPtr to be treated as weak. * SharedPtr to be treated as weak.
* *
* @return True if object was in fact deleted OR this pointer was already NULL * This does not delete the object. It returns it as a naked pointer.
*
* @return Pointer to T if reference count was only one (this shared ptr is left NULL)
*/ */
ZT_INLINE bool weakGC() ZT_INLINE T *weakGC()
{ {
if (likely(m_ptr != nullptr)) { if (likely(m_ptr != nullptr)) {
int one = 1; int one = 1;
if (const_cast<std::atomic< int > *>(&(m_ptr->__refCount))->compare_exchange_strong(one, (int)0, std::memory_order_acq_rel)) { if (const_cast<std::atomic< int > *>(&(m_ptr->__refCount))->compare_exchange_strong(one, (int)0)) {
delete m_ptr; T *const ptr = m_ptr;
m_ptr = nullptr; m_ptr = nullptr;
return true; return ptr;
} else {
return nullptr;
} }
return false;
} else { } else {
return true; return nullptr;
} }
} }
@ -141,7 +144,7 @@ public:
ZT_INLINE int references() noexcept ZT_INLINE int references() noexcept
{ {
if (likely(m_ptr != nullptr)) if (likely(m_ptr != nullptr))
return m_ptr->__refCount; return m_ptr->__refCount.load(std::memory_order_relaxed);
return 0; return 0;
} }

View file

@ -61,6 +61,7 @@ void Topology::doPeriodicTasks(const CallContext &cc)
for (Vector< SharedPtr< Peer > >::const_iterator r(m_roots.begin()); r != m_roots.end(); ++r) for (Vector< SharedPtr< Peer > >::const_iterator r(m_roots.begin()); r != m_roots.end(); ++r)
rootLookup.push_back((uintptr_t)r->ptr()); rootLookup.push_back((uintptr_t)r->ptr());
} }
std::sort(rootLookup.begin(), rootLookup.end());
// Cleaning of peers and paths uses a two pass method to avoid write locking // Cleaning of peers and paths uses a two pass method to avoid write locking
// m_peers or m_paths for any significant amount of time. This avoids pauses // m_peers or m_paths for any significant amount of time. This avoids pauses
@ -71,7 +72,7 @@ void Topology::doPeriodicTasks(const CallContext &cc)
RWMutex::RLock l1(m_peers_l); RWMutex::RLock l1(m_peers_l);
for (Map< Address, SharedPtr< Peer > >::iterator i(m_peers.begin()); i != m_peers.end(); ++i) { for (Map< Address, SharedPtr< Peer > >::iterator i(m_peers.begin()); i != m_peers.end(); ++i) {
// TODO: also delete if the peer has not exchanged meaningful communication in a while, such as a network frame or non-trivial control packet. // TODO: also delete if the peer has not exchanged meaningful communication in a while, such as a network frame or non-trivial control packet.
if (((cc.ticks - i->second->lastReceive()) > ZT_PEER_ALIVE_TIMEOUT) && (std::find(rootLookup.begin(), rootLookup.end(), (uintptr_t)(i->second.ptr())) == rootLookup.end())) if (((cc.ticks - i->second->lastReceive()) > ZT_PEER_ALIVE_TIMEOUT) && (!std::binary_search(rootLookup.begin(), rootLookup.end(), reinterpret_cast<uintptr_t>(i->second.ptr()))))
toDelete.push_back(i->first); toDelete.push_back(i->first);
} }
} }
@ -93,28 +94,28 @@ void Topology::doPeriodicTasks(const CallContext &cc)
} }
} }
// Delete paths that are no longer held by anyone else ("weak reference" type behavior).
// First pass: make a list of paths with a reference count of 1 meaning they are likely
// orphaned. Second pass: call weakGC() on each of these which does a hard compare/exchange
// and delete those that actually are GC'd. Write lock is aquired only briefly on delete
// just as with peers.
{ {
Vector< Path::Key > possibleDelete; Vector< Path * > toDelete;
{ {
RWMutex::RLock l1(m_paths_l); RWMutex::Lock l1(m_paths_l);
for (Map< Path::Key, SharedPtr< Path > >::iterator i(m_paths.begin()); i != m_paths.end(); ++i) { for (Map< Path::Key, SharedPtr< Path > >::iterator i(m_paths.begin()); i != m_paths.end();) {
if (i->second.references() <= 1) Path *const d = i->second.weakGC();
possibleDelete.push_back(i->first); if (likely(d == nullptr)) {
++i;
} else {
m_paths.erase(i++);
try {
toDelete.push_back(d);
} catch (...) {
delete d;
}
}
} }
} }
if (!possibleDelete.empty()) { if (!toDelete.empty()) {
ZT_SPEW("garbage collecting (likely) %u orphaned paths", (unsigned int)possibleDelete.size()); for (Vector< Path * >::iterator i(toDelete.begin()); i != toDelete.end(); ++i)
for (Vector< Path::Key >::const_iterator i(possibleDelete.begin()); i != possibleDelete.end(); ++i) { delete *i;
RWMutex::Lock l1(m_paths_l); ZT_SPEW("garbage collected %u orphaned paths", (unsigned int)toDelete.size());
Map< Path::Key, SharedPtr< Path > >::iterator p(m_paths.find(*i));
if ((p != m_paths.end()) && p->second.weakGC())
m_paths.erase(p);
}
} }
} }
} }