From adfbbc3fb00becc578afc0645c60b1de3d84bb4c Mon Sep 17 00:00:00 2001 From: Grant Limberg Date: Tue, 16 May 2023 11:56:58 -0700 Subject: [PATCH 1/3] Controller Metrics & Network Config Request Fix (#2003) * add new metrics for network config request queue size and sso expirations * move sso expiration to its own thread in the controller * fix potential undefined behavior when modifying a set --- controller/EmbeddedNetworkController.cpp | 75 ++++++++++++++---------- controller/EmbeddedNetworkController.hpp | 4 ++ node/Metrics.cpp | 9 +++ node/Metrics.hpp | 6 ++ osdep/BlockingQueue.hpp | 5 ++ 5 files changed, 68 insertions(+), 31 deletions(-) diff --git a/controller/EmbeddedNetworkController.cpp b/controller/EmbeddedNetworkController.cpp index 1d5cee014..914cad476 100644 --- a/controller/EmbeddedNetworkController.cpp +++ b/controller/EmbeddedNetworkController.cpp @@ -468,6 +468,8 @@ EmbeddedNetworkController::EmbeddedNetworkController(Node *node,const char *ztPa _path(dbPath), _sender((NetworkController::Sender *)0), _db(this), + _ssoExpiryRunning(true), + _ssoExpiry(std::thread(&EmbeddedNetworkController::_ssoExpiryThread, this)), _rc(rc) { } @@ -476,8 +478,11 @@ EmbeddedNetworkController::~EmbeddedNetworkController() { std::lock_guard l(_threads_l); _queue.stop(); - for(auto t=_threads.begin();t!=_threads.end();++t) + for(auto t=_threads.begin();t!=_threads.end();++t) { t->join(); + } + _ssoExpiryRunning = false; + _ssoExpiry.join(); } void EmbeddedNetworkController::setSSORedirectURL(const std::string &url) { @@ -1543,7 +1548,7 @@ void EmbeddedNetworkController::_request( *(reinterpret_cast(&(r->target))) = t; if (v.ss_family == t.ss_family) *(reinterpret_cast(&(r->via))) = v; - ++nc->routeCount; + ++nc->routeCount; } } } @@ -1765,10 +1770,9 @@ void EmbeddedNetworkController::_startThreads() const long hwc = std::max((long)std::thread::hardware_concurrency(),(long)1); for(long t=0;t expired; - nlohmann::json network, member; for(;;) { _RQEntry *qe = (_RQEntry *)0; + Metrics::network_config_request_queue_size = _queue.size(); auto timedWaitResult = _queue.get(qe, 1000); if (timedWaitResult == BlockingQueue<_RQEntry *>::STOP) { break; @@ -1782,38 +1786,47 @@ void EmbeddedNetworkController::_startThreads() fprintf(stderr,"ERROR: exception in controller request handling thread: unknown exception" ZT_EOL_S); } delete qe; + qe = nullptr; } } - - expired.clear(); - int64_t now = OSUtils::now(); - { - std::lock_guard l(_expiringSoon_l); - for(auto s=_expiringSoon.begin();s!=_expiringSoon.end();) { - const int64_t when = s->first; - if (when <= now) { - // The user may have re-authorized, so we must actually look it up and check. - network.clear(); - member.clear(); - if (_db.get(s->second.networkId, network, s->second.nodeId, member)) { - int64_t authenticationExpiryTime = (int64_t)OSUtils::jsonInt(member["authenticationExpiryTime"], 0); - if (authenticationExpiryTime <= now) { - expired.push_back(s->second); - } - } - _expiringSoon.erase(s++); - } else { - // Don't bother going further into the future than necessary. - break; - } - } - } - for(auto e=expired.begin();e!=expired.end();++e) { - onNetworkMemberDeauthorize(nullptr, e->networkId, e->nodeId); - } } }); } } +void EmbeddedNetworkController::_ssoExpiryThread() { + while(_ssoExpiryRunning) { + std::vector<_MemberStatusKey> expired; + nlohmann::json network, member; + int64_t now = OSUtils::now(); + { + std::lock_guard l(_expiringSoon_l); + for(auto s=_expiringSoon.begin();s!=_expiringSoon.end();) { + Metrics::sso_expiration_checks++; + const int64_t when = s->first; + if (when <= now) { + // The user may have re-authorized, so we must actually look it up and check. + network.clear(); + member.clear(); + if (_db.get(s->second.networkId, network, s->second.nodeId, member)) { + int64_t authenticationExpiryTime = (int64_t)OSUtils::jsonInt(member["authenticationExpiryTime"], 0); + if (authenticationExpiryTime <= now) { + expired.push_back(s->second); + } + } + s = _expiringSoon.erase(s); + } else { + // Don't bother going further into the future than necessary. + break; + } + } + } + for(auto e=expired.begin();e!=expired.end();++e) { + Metrics::sso_member_deauth++; + onNetworkMemberDeauthorize(nullptr, e->networkId, e->nodeId); + } + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + } +} + } // namespace ZeroTier diff --git a/controller/EmbeddedNetworkController.hpp b/controller/EmbeddedNetworkController.hpp index 4f2e20e0a..97692fa4c 100644 --- a/controller/EmbeddedNetworkController.hpp +++ b/controller/EmbeddedNetworkController.hpp @@ -81,6 +81,7 @@ public: private: void _request(uint64_t nwid,const InetAddress &fromAddr,uint64_t requestPacketId,const Identity &identity,const Dictionary &metaData); void _startThreads(); + void _ssoExpiryThread(); std::string networkUpdateFromPostData(uint64_t networkID, const std::string &body); @@ -138,6 +139,9 @@ private: std::vector _threads; std::mutex _threads_l; + bool _ssoExpiryRunning; + std::thread _ssoExpiry; + std::unordered_map< _MemberStatusKey,_MemberStatus,_MemberStatusHash > _memberStatus; std::mutex _memberStatus_l; diff --git a/node/Metrics.cpp b/node/Metrics.cpp index 623454761..633c1b853 100644 --- a/node/Metrics.cpp +++ b/node/Metrics.cpp @@ -206,6 +206,15 @@ namespace ZeroTier { prometheus::simpleapi::counter_metric_t member_deauths {"controller_member_deauth_count", "number of network member deauths"}; + prometheus::simpleapi::gauge_metric_t network_config_request_queue_size + { "controller_network_config_request_queue", "number of entries in the request queue for network configurations" }; + + prometheus::simpleapi::counter_metric_t sso_expiration_checks + { "controller_sso_expiration_checks", "number of sso expiration checks done" }; + + prometheus::simpleapi::counter_metric_t sso_member_deauth + { "controller_sso_timeouts", "number of sso timeouts" }; + #ifdef ZT_CONTROLLER_USE_LIBPQ // Central Controller Metrics prometheus::simpleapi::counter_metric_t pgsql_mem_notification diff --git a/node/Metrics.hpp b/node/Metrics.hpp index 66b97c0d6..492a6f9ea 100644 --- a/node/Metrics.hpp +++ b/node/Metrics.hpp @@ -123,6 +123,10 @@ namespace ZeroTier { extern prometheus::simpleapi::counter_metric_t member_auths; extern prometheus::simpleapi::counter_metric_t member_deauths; + extern prometheus::simpleapi::gauge_metric_t network_config_request_queue_size; + extern prometheus::simpleapi::counter_metric_t sso_expiration_checks; + extern prometheus::simpleapi::counter_metric_t sso_member_deauth; + #ifdef ZT_CONTROLLER_USE_LIBPQ // Central Controller Metrics extern prometheus::simpleapi::counter_metric_t pgsql_mem_notification; @@ -132,6 +136,8 @@ namespace ZeroTier { extern prometheus::simpleapi::counter_metric_t redis_net_notification; extern prometheus::simpleapi::counter_metric_t redis_node_checkin; + + // Central DB Pool Metrics extern prometheus::simpleapi::counter_metric_t conn_counter; extern prometheus::simpleapi::counter_metric_t max_pool_size; diff --git a/osdep/BlockingQueue.hpp b/osdep/BlockingQueue.hpp index cce37a04a..f3caff991 100644 --- a/osdep/BlockingQueue.hpp +++ b/osdep/BlockingQueue.hpp @@ -116,6 +116,11 @@ public: return OK; } + inline size_t size() const { + std::unique_lock lock(m); + return q.size(); + } + private: std::queue q; mutable std::mutex m; From 9b7ff43118b3b0a219321aa984138d04fe6d3bb5 Mon Sep 17 00:00:00 2001 From: travis laduke Date: Wed, 17 May 2023 13:17:32 -0700 Subject: [PATCH 2/3] Enable RTTI in Windows build The new prometheus histogram stuff needs it. Access violation - no RTTI data!INVALID packet 636ebd9ee8cac6c0 from cafe9efeb9(2605:9880:200:1200:30:571:e34:51/9993) (unexpected exception in tryDecode()) --- windows/ZeroTierOne/ZeroTierOne.vcxproj | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/windows/ZeroTierOne/ZeroTierOne.vcxproj b/windows/ZeroTierOne/ZeroTierOne.vcxproj index 3722b03ad..aca2d49af 100644 --- a/windows/ZeroTierOne/ZeroTierOne.vcxproj +++ b/windows/ZeroTierOne/ZeroTierOne.vcxproj @@ -417,7 +417,7 @@ $(SolutionDir)\..\ext;$(SolutionDir)\..\ext\prometheus-cpp-lite-1.0\core\include;$(SolutionDir)\..\ext\prometheus-cpp-lite-1.0\simpleapi\include;$(SolutionDir)\..\zeroidc\target;%(AdditionalIncludeDirectories) ZT_SSO_ENABLED=1;ZT_EXPORT;FD_SETSIZE=1024;NOMINMAX;STATICLIB;WIN32;ZT_TRACE;ZT_USE_MINIUPNPC;MINIUPNP_STATICLIB;ZT_SOFTWARE_UPDATE_DEFAULT="disable";%(PreprocessorDefinitions) 4996 - false + true stdcpp17 true stdc11 @@ -439,7 +439,7 @@ $(SolutionDir)\..\ext;$(SolutionDir)\..\ext\prometheus-cpp-lite-1.0\core\include;$(SolutionDir)\..\ext\prometheus-cpp-lite-1.0\simpleapi\include;$(SolutionDir)\..\zeroidc\target;%(AdditionalIncludeDirectories) ZT_SSO_ENABLED=1;ZT_EXPORT;FD_SETSIZE=1024;NOMINMAX;STATICLIB;WIN32;ZT_TRACE;ZT_USE_MINIUPNPC;MINIUPNP_STATICLIB;ZT_SOFTWARE_UPDATE_DEFAULT="disable";%(PreprocessorDefinitions) 4996 - false + true stdcpp17 stdc11 true @@ -461,11 +461,11 @@ ZT_SSO_ENABLED=1;ZT_EXPORT;FD_SETSIZE=1024;NOMINMAX;STATICLIB;WIN32;ZT_TRACE;ZT_RULES_ENGINE_DEBUGGING;ZT_USE_MINIUPNPC;MINIUPNP_STATICLIB;ZT_SOFTWARE_UPDATE_DEFAULT="disable";%(PreprocessorDefinitions) true 4996 - false stdcpp17 stdc11 false false + true true @@ -507,11 +507,11 @@ ZT_SSO_ENABLED=1;ZT_EXPORT;FD_SETSIZE=1024;NOMINMAX;STATICLIB;WIN32;ZT_USE_MINIUPNPC;MINIUPNP_STATICLIB;ZT_SOFTWARE_UPDATE_DEFAULT="disable";%(PreprocessorDefinitions) true 4996 - false stdcpp17 stdc11 false false + true true @@ -558,7 +558,7 @@ true 4996 Guard - false + true stdcpp17 None false @@ -597,7 +597,6 @@ Guard false Cdecl - false stdcpp17 None false @@ -606,6 +605,7 @@ stdc11 false false + true false From e2dad367b4f751845b2c7c7be4a84e6370f4e312 Mon Sep 17 00:00:00 2001 From: travis laduke Date: Wed, 3 May 2023 14:22:40 -0700 Subject: [PATCH 3/3] Don't re-apply routes on BSD See issue #1986 --- osdep/ManagedRoute.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/osdep/ManagedRoute.cpp b/osdep/ManagedRoute.cpp index a8f996839..36d1f07a2 100644 --- a/osdep/ManagedRoute.cpp +++ b/osdep/ManagedRoute.cpp @@ -509,13 +509,13 @@ bool ManagedRoute::sync() } } - //if (!_applied.count(leftt)) { + if (leftt && !_applied.count(leftt)) { _applied[leftt] = !_via; //_routeCmd("delete",leftt,_via,(const char *)0,(_via) ? (const char *)0 : _device); _routeCmd("add",leftt,_via,(const char *)0,(_via) ? (const char *)0 : _device); //_routeCmd("change",leftt,_via,(const char *)0,(_via) ? (const char *)0 : _device); - //} - if (rightt) { + } + if (rightt && !_applied.count(rightt)) { _applied[rightt] = !_via; //_routeCmd("delete",rightt,_via,(const char *)0,(_via) ? (const char *)0 : _device); _routeCmd("add",rightt,_via,(const char *)0,(_via) ? (const char *)0 : _device);