Fix a bug in Topology path GC and make it more efficient.

This commit is contained in:
Adam Ierymenko 2021-04-07 19:31:53 -04:00
parent e26e77dd6d
commit de6fadc12d
No known key found for this signature in database
GPG key ID: C8877CF2D7A5D7F3
3 changed files with 44 additions and 41 deletions

View file

@ -211,15 +211,16 @@ ZT_ResultCode Node::processBackgroundTasks(
try {
Vector< SharedPtr< Peer > > allPeers, rootPeers;
m_ctx.topology->allPeers(allPeers, rootPeers);
std::sort(rootPeers.begin(), rootPeers.end());
bool online = false;
for (Vector< SharedPtr< Peer > >::iterator p(allPeers.begin()); p != allPeers.end(); ++p) {
const bool isRoot = std::find(rootPeers.begin(), rootPeers.end(), *p) != rootPeers.end();
const bool isRoot = std::binary_search(rootPeers.begin(), rootPeers.end(), *p);
(*p)->pulse(m_ctx, cc, isRoot);
online |= ((isRoot || rootPeers.empty()) && (*p)->directlyConnected(cc));
}
if (m_online.exchange(online) != online)
if (m_online.exchange(online, std::memory_order_relaxed) != online)
postEvent(cc.tPtr, online ? ZT_EVENT_ONLINE : ZT_EVENT_OFFLINE);
ZT_SPEW("ranking roots...");
@ -346,9 +347,9 @@ struct p_ZT_PeerListPrivate : public ZT_PeerList
{
// Actual containers for the memory, hidden from external users.
Vector< ZT_Peer > p_peers;
List< Vector< ZT_Path > > p_paths;
List< Identity > p_identities;
List< Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > > p_locators;
ForwardList< Vector< ZT_Path > > p_paths;
ForwardList< Identity > p_identities;
ForwardList< Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > > p_locators;
};
static void p_peerListFreeFunction(const void *pl)
@ -381,9 +382,9 @@ ZT_PeerList *Node::peers(const CallContext &cc) const
Peer &pp = **pi;
p.address = pp.address();
pl->p_identities.push_back(pp.identity());
p.identity = reinterpret_cast<const ZT_Identity *>(&(pl->p_identities.back()));
p.fingerprint = &(pl->p_identities.back().fingerprint());
pl->p_identities.push_front(pp.identity());
p.identity = reinterpret_cast<const ZT_Identity *>(&(pl->p_identities.front()));
p.fingerprint = &(pl->p_identities.front().fingerprint());
if (pp.remoteVersionKnown()) {
p.versionMajor = (int)pp.remoteVersionMajor();
p.versionMinor = (int)pp.remoteVersionMinor();
@ -404,8 +405,8 @@ ZT_PeerList *Node::peers(const CallContext &cc) const
Vector< SharedPtr< Path > > ztPaths;
pp.getAllPaths(ztPaths);
if (ztPaths.empty()) {
pl->p_paths.push_back(Vector< ZT_Path >());
std::vector< ZT_Path > &apiPaths = pl->p_paths.back();
pl->p_paths.push_front(Vector< ZT_Path >());
std::vector< ZT_Path > &apiPaths = pl->p_paths.front();
apiPaths.resize(ztPaths.size());
for (unsigned long i = 0; i < (unsigned long)ztPaths.size(); ++i) {
SharedPtr< Path > &ztp = ztPaths[i];
@ -426,8 +427,8 @@ ZT_PeerList *Node::peers(const CallContext &cc) const
const SharedPtr< const Locator > loc(pp.locator());
if (loc) {
pl->p_locators.push_back(Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX >());
Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > &lb = pl->p_locators.back();
pl->p_locators.push_front(Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX >());
Blob< ZT_LOCATOR_MARSHAL_SIZE_MAX > &lb = pl->p_locators.front();
Utils::zero< ZT_LOCATOR_MARSHAL_SIZE_MAX >(lb.data);
const int ls = loc->marshal(lb.data);
if (ls > 0) {
@ -611,8 +612,6 @@ void Node::setController(void *networkControllerInstance)
m_ctx.localNetworkController->init(m_ctx.identity, this);
}
// Methods used only within the core ----------------------------------------------------------------------------------
bool Node::shouldUsePathForZeroTierTraffic(void *tPtr, const Identity &id, const int64_t localSocket, const InetAddress &remoteAddress)
{
{

View file

@ -116,20 +116,23 @@ public:
* but with the caveat that it only works if there is only one remaining
* SharedPtr to be treated as weak.
*
* @return True if object was in fact deleted OR this pointer was already NULL
* This does not delete the object. It returns it as a naked pointer.
*
* @return Pointer to T if reference count was only one (this shared ptr is left NULL)
*/
ZT_INLINE bool weakGC()
ZT_INLINE T *weakGC()
{
if (likely(m_ptr != nullptr)) {
int one = 1;
if (const_cast<std::atomic< int > *>(&(m_ptr->__refCount))->compare_exchange_strong(one, (int)0, std::memory_order_acq_rel)) {
delete m_ptr;
if (const_cast<std::atomic< int > *>(&(m_ptr->__refCount))->compare_exchange_strong(one, (int)0)) {
T *const ptr = m_ptr;
m_ptr = nullptr;
return true;
return ptr;
} else {
return nullptr;
}
return false;
} else {
return true;
return nullptr;
}
}
@ -141,7 +144,7 @@ public:
ZT_INLINE int references() noexcept
{
if (likely(m_ptr != nullptr))
return m_ptr->__refCount;
return m_ptr->__refCount.load(std::memory_order_relaxed);
return 0;
}

View file

@ -61,6 +61,7 @@ void Topology::doPeriodicTasks(const CallContext &cc)
for (Vector< SharedPtr< Peer > >::const_iterator r(m_roots.begin()); r != m_roots.end(); ++r)
rootLookup.push_back((uintptr_t)r->ptr());
}
std::sort(rootLookup.begin(), rootLookup.end());
// Cleaning of peers and paths uses a two pass method to avoid write locking
// m_peers or m_paths for any significant amount of time. This avoids pauses
@ -71,7 +72,7 @@ void Topology::doPeriodicTasks(const CallContext &cc)
RWMutex::RLock l1(m_peers_l);
for (Map< Address, SharedPtr< Peer > >::iterator i(m_peers.begin()); i != m_peers.end(); ++i) {
// TODO: also delete if the peer has not exchanged meaningful communication in a while, such as a network frame or non-trivial control packet.
if (((cc.ticks - i->second->lastReceive()) > ZT_PEER_ALIVE_TIMEOUT) && (std::find(rootLookup.begin(), rootLookup.end(), (uintptr_t)(i->second.ptr())) == rootLookup.end()))
if (((cc.ticks - i->second->lastReceive()) > ZT_PEER_ALIVE_TIMEOUT) && (!std::binary_search(rootLookup.begin(), rootLookup.end(), reinterpret_cast<uintptr_t>(i->second.ptr()))))
toDelete.push_back(i->first);
}
}
@ -93,28 +94,28 @@ void Topology::doPeriodicTasks(const CallContext &cc)
}
}
// Delete paths that are no longer held by anyone else ("weak reference" type behavior).
// First pass: make a list of paths with a reference count of 1 meaning they are likely
// orphaned. Second pass: call weakGC() on each of these which does a hard compare/exchange
// and delete those that actually are GC'd. Write lock is aquired only briefly on delete
// just as with peers.
{
Vector< Path::Key > possibleDelete;
Vector< Path * > toDelete;
{
RWMutex::RLock l1(m_paths_l);
for (Map< Path::Key, SharedPtr< Path > >::iterator i(m_paths.begin()); i != m_paths.end(); ++i) {
if (i->second.references() <= 1)
possibleDelete.push_back(i->first);
RWMutex::Lock l1(m_paths_l);
for (Map< Path::Key, SharedPtr< Path > >::iterator i(m_paths.begin()); i != m_paths.end();) {
Path *const d = i->second.weakGC();
if (likely(d == nullptr)) {
++i;
} else {
m_paths.erase(i++);
try {
toDelete.push_back(d);
} catch (...) {
delete d;
}
}
}
}
if (!possibleDelete.empty()) {
ZT_SPEW("garbage collecting (likely) %u orphaned paths", (unsigned int)possibleDelete.size());
for (Vector< Path::Key >::const_iterator i(possibleDelete.begin()); i != possibleDelete.end(); ++i) {
RWMutex::Lock l1(m_paths_l);
Map< Path::Key, SharedPtr< Path > >::iterator p(m_paths.find(*i));
if ((p != m_paths.end()) && p->second.weakGC())
m_paths.erase(p);
}
if (!toDelete.empty()) {
for (Vector< Path * >::iterator i(toDelete.begin()); i != toDelete.end(); ++i)
delete *i;
ZT_SPEW("garbage collected %u orphaned paths", (unsigned int)toDelete.size());
}
}
}