mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-04-13 18:56:54 +02:00
Metrics consolidation (#1997)
* Rename zt_packet_incoming -> zt_packet Also consolidate zt_peer_packets into a single metric with tx and rx labels. Same for ztc_tcp_data and ztc_udp_data * Further collapse tcp & udp into metric labels for zt_data * Fix zt_data metric description * zt_peer_packets description fix * Consolidate incoming/outgoing network packets to a single metric * zt_incoming_packet_error -> zt_packet_error * Disable peer metrics for central controllers Can change in the future if needed, but given the traffic our controllers serve, that's going to be a *lot* of data * Disable peer metrics for controllers pt 2
This commit is contained in:
parent
74dc41c7c7
commit
00d55fc4b4
7 changed files with 75 additions and 60 deletions
|
@ -311,7 +311,7 @@ endif
|
|||
ifeq ($(ZT_CONTROLLER),1)
|
||||
override CXXFLAGS+=-Wall -Wno-deprecated -std=c++17 -pthread $(INCLUDES) -DNDEBUG $(DEFS)
|
||||
override LDLIBS+=-Lext/libpqxx-7.7.3/install/ubuntu22.04/lib -lpqxx -lpq ext/hiredis-1.0.2/lib/ubuntu22.04/libhiredis.a ext/redis-plus-plus-1.3.3/install/ubuntu22.04/lib/libredis++.a -lssl -lcrypto
|
||||
override DEFS+=-DZT_CONTROLLER_USE_LIBPQ
|
||||
override DEFS+=-DZT_CONTROLLER_USE_LIBPQ -DZT_NO_PEER_METRICS
|
||||
override INCLUDES+=-I/usr/include/postgresql -Iext/libpqxx-7.7.3/install/ubuntu22.04/include -Iext/hiredis-1.0.2/include/ -Iext/redis-plus-plus-1.3.3/install/ubuntu22.04/include/sw/
|
||||
endif
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ namespace ZeroTier {
|
|||
namespace Metrics {
|
||||
// Packet Type Counts
|
||||
prometheus::simpleapi::counter_family_t packets
|
||||
{ "zt_packet_incoming", "incoming packet type counts"};
|
||||
{ "zt_packet", "incoming packet type counts"};
|
||||
|
||||
// Incoming packets
|
||||
prometheus::simpleapi::counter_metric_t pkt_nop_in
|
||||
|
@ -118,7 +118,7 @@ namespace ZeroTier {
|
|||
|
||||
// Packet Error Counts
|
||||
prometheus::simpleapi::counter_family_t packet_errors
|
||||
{ "zt_packet_incoming_error", "incoming packet errors"};
|
||||
{ "zt_packet_error", "incoming packet errors"};
|
||||
|
||||
// Incoming Error Counts
|
||||
prometheus::simpleapi::counter_metric_t pkt_error_obj_not_found_in
|
||||
|
@ -157,25 +157,26 @@ namespace ZeroTier {
|
|||
{ packet_errors.Add({{"error_type", "internal_server_error"}, {"direction", "tx"}}) };
|
||||
|
||||
// Data Sent/Received Metrics
|
||||
prometheus::simpleapi::counter_metric_t udp_send
|
||||
{ "zt_udp_data_sent", "number of bytes ZeroTier has sent via UDP" };
|
||||
prometheus::simpleapi::counter_family_t data
|
||||
{ "zt_data", "number of bytes ZeroTier has transmitted or received" };
|
||||
prometheus::simpleapi::counter_metric_t udp_recv
|
||||
{ "zt_udp_data_recv", "number of bytes ZeroTier has received via UDP" };
|
||||
{ data.Add({{"protocol","udp"},{"direction","rx"}}) };
|
||||
prometheus::simpleapi::counter_metric_t udp_send
|
||||
{ data.Add({{"protocol","udp"},{"direction","tx"}}) };
|
||||
prometheus::simpleapi::counter_metric_t tcp_send
|
||||
{ "zt_tcp_data_sent", "number of bytes ZeroTier has sent via TCP" };
|
||||
{ data.Add({{"protocol","tcp"},{"direction", "tx"}}) };
|
||||
prometheus::simpleapi::counter_metric_t tcp_recv
|
||||
{ "zt_tcp_data_recv", "number of bytes ZeroTier has received via TCP" };
|
||||
{ data.Add({{"protocol","tcp"},{"direction", "rx"}}) };
|
||||
|
||||
// Network Metrics
|
||||
prometheus::simpleapi::gauge_metric_t network_num_joined
|
||||
{ "zt_num_networks", "number of networks this instance is joined to" };
|
||||
prometheus::simpleapi::gauge_family_t network_num_multicast_groups
|
||||
{ "zt_network_multcast_groups_subscribed", "number of multicast groups networks are subscribed to" };
|
||||
prometheus::simpleapi::counter_family_t network_incoming_packets
|
||||
{ "zt_network_incoming_packets", "number of incoming packets per network" };
|
||||
prometheus::simpleapi::counter_family_t network_outgoing_packets
|
||||
{ "zt_network_outgoing_packets", "number of outgoing packets per network" };
|
||||
|
||||
{ "zt_network_multicast_groups_subscribed", "number of multicast groups networks are subscribed to" };
|
||||
prometheus::simpleapi::counter_family_t network_packets
|
||||
{ "zt_network_packets", "number of incoming/outgoing packets per network" };
|
||||
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
// PeerMetrics
|
||||
prometheus::CustomFamily<prometheus::Histogram<uint64_t>> &peer_latency =
|
||||
prometheus::Builder<prometheus::Histogram<uint64_t>>()
|
||||
|
@ -185,12 +186,11 @@ namespace ZeroTier {
|
|||
|
||||
prometheus::simpleapi::gauge_family_t peer_path_count
|
||||
{ "zt_peer_path_count", "number of paths to peer" };
|
||||
prometheus::simpleapi::counter_family_t peer_incoming_packets
|
||||
{ "zt_peer_incoming_packets", "number of incoming packets from a peer" };
|
||||
prometheus::simpleapi::counter_family_t peer_outgoing_packets
|
||||
{ "zt_peer_outgoing_packets", "number of outgoing packets to a peer" };
|
||||
prometheus::simpleapi::counter_family_t peer_packets
|
||||
{ "zt_peer_packets", "number of packets to/from a peer" };
|
||||
prometheus::simpleapi::counter_family_t peer_packet_errors
|
||||
{ "zt_peer_packet_errors" , "number of incoming packet errors from a peer" };
|
||||
#endif
|
||||
|
||||
// General Controller Metrics
|
||||
prometheus::simpleapi::gauge_metric_t network_count
|
||||
|
|
|
@ -96,23 +96,24 @@ namespace ZeroTier {
|
|||
extern prometheus::simpleapi::counter_metric_t pkt_error_internal_server_error_out;
|
||||
|
||||
// Data Sent/Received Metrics
|
||||
extern prometheus::simpleapi::counter_family_t data;
|
||||
extern prometheus::simpleapi::counter_metric_t udp_send;
|
||||
extern prometheus::simpleapi::counter_metric_t udp_recv;
|
||||
extern prometheus::simpleapi::counter_metric_t tcp_send;
|
||||
extern prometheus::simpleapi::counter_metric_t tcp_recv;
|
||||
|
||||
// Network Metrics
|
||||
extern prometheus::simpleapi::gauge_metric_t network_num_joined;
|
||||
extern prometheus::simpleapi::gauge_family_t network_num_multicast_groups;
|
||||
extern prometheus::simpleapi::counter_family_t network_incoming_packets;
|
||||
extern prometheus::simpleapi::counter_family_t network_outgoing_packets;
|
||||
extern prometheus::simpleapi::gauge_metric_t network_num_joined;
|
||||
extern prometheus::simpleapi::gauge_family_t network_num_multicast_groups;
|
||||
extern prometheus::simpleapi::counter_family_t network_packets;
|
||||
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
// Peer Metrics
|
||||
extern prometheus::CustomFamily<prometheus::Histogram<uint64_t>> &peer_latency;
|
||||
extern prometheus::simpleapi::gauge_family_t peer_path_count;
|
||||
extern prometheus::simpleapi::counter_family_t peer_incoming_packets;
|
||||
extern prometheus::simpleapi::counter_family_t peer_outgoing_packets;
|
||||
extern prometheus::simpleapi::gauge_family_t peer_path_count;
|
||||
extern prometheus::simpleapi::counter_family_t peer_packets;
|
||||
extern prometheus::simpleapi::counter_family_t peer_packet_errors;
|
||||
#endif
|
||||
|
||||
// General Controller Metrics
|
||||
extern prometheus::simpleapi::gauge_metric_t network_count;
|
||||
|
|
|
@ -569,10 +569,10 @@ Network::Network(const RuntimeEnvironment *renv,void *tPtr,uint64_t nwid,void *u
|
|||
_netconfFailure(NETCONF_FAILURE_NONE),
|
||||
_portError(0),
|
||||
_num_multicast_groups{Metrics::network_num_multicast_groups.Add({{"network_id", _nwidStr}})},
|
||||
_incoming_packets_accpeted{Metrics::network_incoming_packets.Add({{"network_id", _nwidStr},{"accepted","yes"}})},
|
||||
_incoming_packets_dropped{Metrics::network_incoming_packets.Add({{"network_id", _nwidStr},{"accepted","no"}})},
|
||||
_outgoing_packets_accepted{Metrics::network_outgoing_packets.Add({{"network_id", _nwidStr},{"accepted","yes"}})},
|
||||
_outgoing_packets_dropped{Metrics::network_outgoing_packets.Add({{"network_id", _nwidStr},{"accepted","no"}})}
|
||||
_incoming_packets_accepted{Metrics::network_packets.Add({{"direction","rx"},{"network_id", _nwidStr},{"accepted","yes"}})},
|
||||
_incoming_packets_dropped{Metrics::network_packets.Add({{"direction","rx"},{"network_id", _nwidStr},{"accepted","no"}})},
|
||||
_outgoing_packets_accepted{Metrics::network_packets.Add({{"direction","tx"},{"network_id", _nwidStr},{"accepted","yes"}})},
|
||||
_outgoing_packets_dropped{Metrics::network_packets.Add({{"direction","tx"},{"network_id", _nwidStr},{"accepted","no"}})}
|
||||
{
|
||||
for(int i=0;i<ZT_NETWORK_MAX_INCOMING_UPDATES;++i) {
|
||||
_incomingConfigChunks[i].ts = 0;
|
||||
|
@ -837,7 +837,7 @@ int Network::filterIncomingPacket(
|
|||
}
|
||||
|
||||
if (accept) {
|
||||
_incoming_packets_accpeted++;
|
||||
_incoming_packets_accepted++;
|
||||
if (cc) {
|
||||
Packet outp(cc,RR->identity.address(),Packet::VERB_EXT_FRAME);
|
||||
outp.append(_id);
|
||||
|
|
|
@ -483,7 +483,7 @@ private:
|
|||
AtomicCounter __refCount;
|
||||
|
||||
prometheus::simpleapi::gauge_metric_t _num_multicast_groups;
|
||||
prometheus::simpleapi::counter_metric_t _incoming_packets_accpeted;
|
||||
prometheus::simpleapi::counter_metric_t _incoming_packets_accepted;
|
||||
prometheus::simpleapi::counter_metric_t _incoming_packets_dropped;
|
||||
prometheus::simpleapi::counter_metric_t _outgoing_packets_accepted;
|
||||
prometheus::simpleapi::counter_metric_t _outgoing_packets_dropped;
|
||||
|
|
|
@ -28,35 +28,37 @@ namespace ZeroTier {
|
|||
|
||||
static unsigned char s_freeRandomByteCounter = 0;
|
||||
|
||||
Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Identity &peerIdentity) :
|
||||
RR(renv),
|
||||
_lastReceive(0),
|
||||
_lastNontrivialReceive(0),
|
||||
_lastTriedMemorizedPath(0),
|
||||
_lastDirectPathPushSent(0),
|
||||
_lastDirectPathPushReceive(0),
|
||||
_lastCredentialRequestSent(0),
|
||||
_lastWhoisRequestReceived(0),
|
||||
_lastCredentialsReceived(0),
|
||||
_lastTrustEstablishedPacketReceived(0),
|
||||
_lastSentFullHello(0),
|
||||
_lastEchoCheck(0),
|
||||
_freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter),
|
||||
_vProto(0),
|
||||
_vMajor(0),
|
||||
_vMinor(0),
|
||||
_vRevision(0),
|
||||
_id(peerIdentity),
|
||||
_directPathPushCutoffCount(0),
|
||||
_echoRequestCutoffCount(0),
|
||||
_localMultipathSupported(false),
|
||||
_lastComputedAggregateMeanLatency(0),
|
||||
_peer_latency{Metrics::peer_latency.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}}, std::vector<uint64_t>{1,3,6,10,30,60,100,300,600,1000})},
|
||||
_alive_path_count{Metrics::peer_path_count.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())},{"status","alive"}})},
|
||||
_dead_path_count{Metrics::peer_path_count.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())},{"status","dead"}})},
|
||||
_incoming_packet{Metrics::peer_incoming_packets.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})},
|
||||
_outgoing_packet{Metrics::peer_outgoing_packets.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})},
|
||||
_packet_errors{Metrics::peer_packet_errors.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})}
|
||||
Peer::Peer(const RuntimeEnvironment *renv,const Identity &myIdentity,const Identity &peerIdentity)
|
||||
: RR(renv)
|
||||
, _lastReceive(0)
|
||||
, _lastNontrivialReceive(0)
|
||||
, _lastTriedMemorizedPath(0)
|
||||
, _lastDirectPathPushSent(0)
|
||||
, _lastDirectPathPushReceive(0)
|
||||
, _lastCredentialRequestSent(0)
|
||||
, _lastWhoisRequestReceived(0)
|
||||
, _lastCredentialsReceived(0)
|
||||
, _lastTrustEstablishedPacketReceived(0)
|
||||
, _lastSentFullHello(0)
|
||||
, _lastEchoCheck(0)
|
||||
, _freeRandomByte((unsigned char)((uintptr_t)this >> 4) ^ ++s_freeRandomByteCounter)
|
||||
, _vProto(0)
|
||||
, _vMajor(0)
|
||||
, _vMinor(0)
|
||||
, _vRevision(0)
|
||||
, _id(peerIdentity)
|
||||
, _directPathPushCutoffCount(0)
|
||||
, _echoRequestCutoffCount(0)
|
||||
, _localMultipathSupported(false)
|
||||
, _lastComputedAggregateMeanLatency(0)
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
, _peer_latency{Metrics::peer_latency.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}}, std::vector<uint64_t>{1,3,6,10,30,60,100,300,600,1000})}
|
||||
, _alive_path_count{Metrics::peer_path_count.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())},{"status","alive"}})}
|
||||
, _dead_path_count{Metrics::peer_path_count.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())},{"status","dead"}})}
|
||||
, _incoming_packet{Metrics::peer_packets.Add({{"direction", "rx"},{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})}
|
||||
, _outgoing_packet{Metrics::peer_packets.Add({{"direction", "tx"},{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})}
|
||||
, _packet_errors{Metrics::peer_packet_errors.Add({{"node_id", OSUtils::nodeIDStr(peerIdentity.address().toInt())}})}
|
||||
#endif
|
||||
{
|
||||
if (!myIdentity.agree(peerIdentity,_key)) {
|
||||
throw ZT_EXCEPTION_INVALID_ARGUMENT;
|
||||
|
@ -97,7 +99,9 @@ void Peer::received(
|
|||
default:
|
||||
break;
|
||||
}
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
_incoming_packet++;
|
||||
#endif
|
||||
recordIncomingPacket(path, packetId, payloadLength, verb, flowId, now);
|
||||
|
||||
if (trustEstablished) {
|
||||
|
@ -569,6 +573,7 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
|
|||
deletionOccurred = false;
|
||||
}
|
||||
}
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
uint16_t alive_path_count_tmp = 0, dead_path_count_tmp = 0;
|
||||
for(unsigned int i=0;i<ZT_MAX_PEER_NETWORK_PATHS;++i) {
|
||||
if (_paths[i].p) {
|
||||
|
@ -582,8 +587,11 @@ unsigned int Peer::doPingAndKeepalive(void *tPtr,int64_t now)
|
|||
}
|
||||
_alive_path_count = alive_path_count_tmp;
|
||||
_dead_path_count = dead_path_count_tmp;
|
||||
#endif
|
||||
}
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
_peer_latency.Observe(latency(now));
|
||||
#endif
|
||||
return sent;
|
||||
}
|
||||
|
||||
|
@ -658,7 +666,9 @@ void Peer::resetWithinScope(void *tPtr,InetAddress::IpScope scope,int inetAddres
|
|||
void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t packetId,
|
||||
uint16_t payloadLength, const Packet::Verb verb, const int32_t flowId, int64_t now)
|
||||
{
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
_outgoing_packet++;
|
||||
#endif
|
||||
if (_localMultipathSupported && _bond) {
|
||||
_bond->recordOutgoingPacket(path, packetId, payloadLength, verb, flowId, now);
|
||||
}
|
||||
|
@ -666,7 +676,9 @@ void Peer::recordOutgoingPacket(const SharedPtr<Path> &path, const uint64_t pack
|
|||
|
||||
void Peer::recordIncomingInvalidPacket(const SharedPtr<Path>& path)
|
||||
{
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
_packet_errors++;
|
||||
#endif
|
||||
if (_localMultipathSupported && _bond) {
|
||||
_bond->recordIncomingInvalidPacket(path);
|
||||
}
|
||||
|
|
|
@ -599,12 +599,14 @@ private:
|
|||
|
||||
SharedPtr<Bond> _bond;
|
||||
|
||||
#ifndef ZT_NO_PEER_METRICS
|
||||
prometheus::Histogram<uint64_t> &_peer_latency;
|
||||
prometheus::simpleapi::gauge_metric_t _alive_path_count;
|
||||
prometheus::simpleapi::gauge_metric_t _dead_path_count;
|
||||
prometheus::simpleapi::counter_metric_t _incoming_packet;
|
||||
prometheus::simpleapi::counter_metric_t _outgoing_packet;
|
||||
prometheus::simpleapi::counter_metric_t _packet_errors;
|
||||
#endif
|
||||
};
|
||||
|
||||
} // namespace ZeroTier
|
||||
|
|
Loading…
Add table
Reference in a new issue