Controller Metrics & Network Config Request Fix (#2003)

* add new metrics for network config request queue size and sso expirations
* move sso expiration to its own thread in the controller
* fix potential undefined behavior when modifying a set
This commit is contained in:
Grant Limberg 2023-05-16 11:56:58 -07:00 committed by GitHub
parent f621261ff9
commit adfbbc3fb0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 68 additions and 31 deletions

View file

@ -468,6 +468,8 @@ EmbeddedNetworkController::EmbeddedNetworkController(Node *node,const char *ztPa
_path(dbPath),
_sender((NetworkController::Sender *)0),
_db(this),
_ssoExpiryRunning(true),
_ssoExpiry(std::thread(&EmbeddedNetworkController::_ssoExpiryThread, this)),
_rc(rc)
{
}
@ -476,8 +478,11 @@ EmbeddedNetworkController::~EmbeddedNetworkController()
{
std::lock_guard<std::mutex> l(_threads_l);
_queue.stop();
for(auto t=_threads.begin();t!=_threads.end();++t)
for(auto t=_threads.begin();t!=_threads.end();++t) {
t->join();
}
_ssoExpiryRunning = false;
_ssoExpiry.join();
}
void EmbeddedNetworkController::setSSORedirectURL(const std::string &url) {
@ -1543,7 +1548,7 @@ void EmbeddedNetworkController::_request(
*(reinterpret_cast<InetAddress *>(&(r->target))) = t;
if (v.ss_family == t.ss_family)
*(reinterpret_cast<InetAddress *>(&(r->via))) = v;
++nc->routeCount;
++nc->routeCount;
}
}
}
@ -1765,10 +1770,9 @@ void EmbeddedNetworkController::_startThreads()
const long hwc = std::max((long)std::thread::hardware_concurrency(),(long)1);
for(long t=0;t<hwc;++t) {
_threads.emplace_back([this]() {
std::vector<_MemberStatusKey> expired;
nlohmann::json network, member;
for(;;) {
_RQEntry *qe = (_RQEntry *)0;
Metrics::network_config_request_queue_size = _queue.size();
auto timedWaitResult = _queue.get(qe, 1000);
if (timedWaitResult == BlockingQueue<_RQEntry *>::STOP) {
break;
@ -1782,38 +1786,47 @@ void EmbeddedNetworkController::_startThreads()
fprintf(stderr,"ERROR: exception in controller request handling thread: unknown exception" ZT_EOL_S);
}
delete qe;
qe = nullptr;
}
}
expired.clear();
int64_t now = OSUtils::now();
{
std::lock_guard<std::mutex> l(_expiringSoon_l);
for(auto s=_expiringSoon.begin();s!=_expiringSoon.end();) {
const int64_t when = s->first;
if (when <= now) {
// The user may have re-authorized, so we must actually look it up and check.
network.clear();
member.clear();
if (_db.get(s->second.networkId, network, s->second.nodeId, member)) {
int64_t authenticationExpiryTime = (int64_t)OSUtils::jsonInt(member["authenticationExpiryTime"], 0);
if (authenticationExpiryTime <= now) {
expired.push_back(s->second);
}
}
_expiringSoon.erase(s++);
} else {
// Don't bother going further into the future than necessary.
break;
}
}
}
for(auto e=expired.begin();e!=expired.end();++e) {
onNetworkMemberDeauthorize(nullptr, e->networkId, e->nodeId);
}
}
});
}
}
void EmbeddedNetworkController::_ssoExpiryThread() {
while(_ssoExpiryRunning) {
std::vector<_MemberStatusKey> expired;
nlohmann::json network, member;
int64_t now = OSUtils::now();
{
std::lock_guard<std::mutex> l(_expiringSoon_l);
for(auto s=_expiringSoon.begin();s!=_expiringSoon.end();) {
Metrics::sso_expiration_checks++;
const int64_t when = s->first;
if (when <= now) {
// The user may have re-authorized, so we must actually look it up and check.
network.clear();
member.clear();
if (_db.get(s->second.networkId, network, s->second.nodeId, member)) {
int64_t authenticationExpiryTime = (int64_t)OSUtils::jsonInt(member["authenticationExpiryTime"], 0);
if (authenticationExpiryTime <= now) {
expired.push_back(s->second);
}
}
s = _expiringSoon.erase(s);
} else {
// Don't bother going further into the future than necessary.
break;
}
}
}
for(auto e=expired.begin();e!=expired.end();++e) {
Metrics::sso_member_deauth++;
onNetworkMemberDeauthorize(nullptr, e->networkId, e->nodeId);
}
std::this_thread::sleep_for(std::chrono::milliseconds(500));
}
}
} // namespace ZeroTier

View file

@ -81,6 +81,7 @@ public:
private:
void _request(uint64_t nwid,const InetAddress &fromAddr,uint64_t requestPacketId,const Identity &identity,const Dictionary<ZT_NETWORKCONFIG_METADATA_DICT_CAPACITY> &metaData);
void _startThreads();
void _ssoExpiryThread();
std::string networkUpdateFromPostData(uint64_t networkID, const std::string &body);
@ -138,6 +139,9 @@ private:
std::vector<std::thread> _threads;
std::mutex _threads_l;
bool _ssoExpiryRunning;
std::thread _ssoExpiry;
std::unordered_map< _MemberStatusKey,_MemberStatus,_MemberStatusHash > _memberStatus;
std::mutex _memberStatus_l;

View file

@ -206,6 +206,15 @@ namespace ZeroTier {
prometheus::simpleapi::counter_metric_t member_deauths
{"controller_member_deauth_count", "number of network member deauths"};
prometheus::simpleapi::gauge_metric_t network_config_request_queue_size
{ "controller_network_config_request_queue", "number of entries in the request queue for network configurations" };
prometheus::simpleapi::counter_metric_t sso_expiration_checks
{ "controller_sso_expiration_checks", "number of sso expiration checks done" };
prometheus::simpleapi::counter_metric_t sso_member_deauth
{ "controller_sso_timeouts", "number of sso timeouts" };
#ifdef ZT_CONTROLLER_USE_LIBPQ
// Central Controller Metrics
prometheus::simpleapi::counter_metric_t pgsql_mem_notification

View file

@ -123,6 +123,10 @@ namespace ZeroTier {
extern prometheus::simpleapi::counter_metric_t member_auths;
extern prometheus::simpleapi::counter_metric_t member_deauths;
extern prometheus::simpleapi::gauge_metric_t network_config_request_queue_size;
extern prometheus::simpleapi::counter_metric_t sso_expiration_checks;
extern prometheus::simpleapi::counter_metric_t sso_member_deauth;
#ifdef ZT_CONTROLLER_USE_LIBPQ
// Central Controller Metrics
extern prometheus::simpleapi::counter_metric_t pgsql_mem_notification;
@ -132,6 +136,8 @@ namespace ZeroTier {
extern prometheus::simpleapi::counter_metric_t redis_net_notification;
extern prometheus::simpleapi::counter_metric_t redis_node_checkin;
// Central DB Pool Metrics
extern prometheus::simpleapi::counter_metric_t conn_counter;
extern prometheus::simpleapi::counter_metric_t max_pool_size;

View file

@ -116,6 +116,11 @@ public:
return OK;
}
inline size_t size() const {
std::unique_lock<std::mutex> lock(m);
return q.size();
}
private:
std::queue<T> q;
mutable std::mutex m;