Merge branch 'dev' into patch-1

This commit is contained in:
Grant Limberg 2023-07-06 12:45:43 -07:00 committed by GitHub
commit 6c3c2277c6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
410 changed files with 41000 additions and 5057 deletions

View file

@ -19,17 +19,14 @@ jobs:
components: rustfmt, clippy
- name: Set up cargo cache
uses: actions/cache@v3
uses: Swatinem/rust-cache@v2
continue-on-error: false
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-
key: ${{ runner.os }}-cargo-${{ hashFiles('zeroidc//Cargo.lock') }}
shared-key: ${{ runner.os }}-cargo-
workspaces: |
zeroidc/
- name: make
run: make
- name: selftest
@ -54,17 +51,14 @@ jobs:
override: true
components: rustfmt, clippy
- name: Set up cargo cache
uses: actions/cache@v3
uses: Swatinem/rust-cache@v2
continue-on-error: false
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-
key: ${{ runner.os }}-cargo-${{ hashFiles('zeroidc//Cargo.lock') }}
shared-key: ${{ runner.os }}-cargo-
workspaces: |
zeroidc/
- name: make
run: make
- name: selftest
@ -89,19 +83,16 @@ jobs:
override: true
components: rustfmt, clippy
- name: Set up cargo cache
uses: actions/cache@v3
uses: Swatinem/rust-cache@v2
continue-on-error: false
with:
path: |
~/.cargo/bin/
~/.cargo/registry/index/
~/.cargo/registry/cache/
~/.cargo/git/db/
target/
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
restore-keys: ${{ runner.os }}-cargo-
key: ${{ runner.os }}-cargo-${{ hashFiles('zeroidc//Cargo.lock') }}
shared-key: ${{ runner.os }}-cargo-
workspaces: |
zeroidc/
- name: setup msbuild
uses: microsoft/setup-msbuild@v1.1.3
- name: msbuild
run: |
msbuild windows\ZeroTierOne.sln /m /p:Configuration=Release /property:Platform=x64 /t:ZeroTierOne:Rebuild
msbuild windows\ZeroTierOne.sln /m /p:Configuration=Release /property:Platform=x64 /t:ZeroTierOne

15
.github/workflows/report.sh vendored Executable file
View file

@ -0,0 +1,15 @@
#!/bin/bash
################################################################################
# Set exit code depending on tool reports #
################################################################################
DEFINITELY_LOST=$(cat *test-results/*summary.json | jq .num_definite_bytes_lost)
cat *test-results/*summary.json
echo -e "\nBytes of memory definitely lost: $DEFINITELY_LOST"
if [[ "$DEFINITELY_LOST" -gt 0 ]]; then
exit 1
fi

434
.github/workflows/validate-1m-linux.sh vendored Executable file
View file

@ -0,0 +1,434 @@
#!/bin/bash
# This test script joins Earth and pokes some stuff
TEST_NETWORK=8056c2e21c000001
RUN_LENGTH=60
TEST_FINISHED=false
ZTO_VER=$(git describe --tags $(git rev-list --tags --max-count=1))
ZTO_COMMIT=$(git rev-parse HEAD)
ZTO_COMMIT_SHORT=$(git rev-parse --short HEAD)
TEST_DIR_PREFIX="$ZTO_VER-$ZTO_COMMIT_SHORT-test-results"
echo "Performing test on: $ZTO_VER-$ZTO_COMMIT_SHORT"
TEST_FILEPATH_PREFIX="$TEST_DIR_PREFIX/$ZTO_COMMIT_SHORT"
mkdir $TEST_DIR_PREFIX
################################################################################
# Multi-node connectivity and performance test #
################################################################################
main() {
echo -e "\nRunning test for $RUN_LENGTH seconds"
NS1="ip netns exec ns1"
NS2="ip netns exec ns2"
ZT1="$NS1 ./zerotier-cli -p9996 -D$(pwd)/node1"
# Specify custom port on one node to ensure that feature works
ZT2="$NS2 ./zerotier-cli -p9997 -D$(pwd)/node2"
echo -e "\nSetting up network namespaces..."
echo "Setting up ns1"
ip netns add ns1
$NS1 ip link set dev lo up
ip link add veth0 type veth peer name veth1
ip link set veth1 netns ns1
ip addr add 192.168.0.1/24 dev veth0
ip link set dev veth0 up
$NS1 ip addr add 192.168.0.2/24 dev veth1
$NS1 ip link set dev veth1 up
# Add default route
$NS1 ip route add default via 192.168.0.1
iptables -t nat -A POSTROUTING -s 192.168.0.0/255.255.255.0 \
-o eth0 -j MASQUERADE
iptables -A FORWARD -i eth0 -o veth0 -j ACCEPT
iptables -A FORWARD -o eth0 -i veth0 -j ACCEPT
echo "Setting up ns2"
ip netns add ns2
$NS2 ip link set dev lo up
ip link add veth2 type veth peer name veth3
ip link set veth3 netns ns2
ip addr add 192.168.1.1/24 dev veth2
ip link set dev veth2 up
$NS2 ip addr add 192.168.1.2/24 dev veth3
$NS2 ip link set dev veth3 up
$NS2 ip route add default via 192.168.1.1
iptables -t nat -A POSTROUTING -s 192.168.1.0/255.255.255.0 \
-o eth0 -j MASQUERADE
iptables -A FORWARD -i eth0 -o veth2 -j ACCEPT
iptables -A FORWARD -o eth0 -i veth2 -j ACCEPT
# Allow forwarding
sysctl -w net.ipv4.ip_forward=1
echo -e "\nPing from host to namespaces"
ping -c 3 192.168.0.1
ping -c 3 192.168.1.1
echo -e "\nPing from namespace to host"
$NS1 ping -c 3 192.168.0.1
$NS1 ping -c 3 192.168.0.1
$NS2 ping -c 3 192.168.0.2
$NS2 ping -c 3 192.168.0.2
echo -e "\nPing from ns1 to ns2"
$NS1 ping -c 3 192.168.0.1
echo -e "\nPing from ns2 to ns1"
$NS2 ping -c 3 192.168.0.1
################################################################################
# Memory Leak Check #
################################################################################
FILENAME_MEMORY_LOG="$TEST_FILEPATH_PREFIX-memory.log"
echo -e "\nStarting a ZeroTier instance in each namespace..."
time_test_start=$(date +%s)
# Spam the CLI as ZeroTier is starting
spam_cli 100
echo "Starting memory leak check"
$NS1 sudo valgrind --demangle=yes --exit-on-first-error=yes \
--error-exitcode=1 \
--xml=yes \
--xml-file=$FILENAME_MEMORY_LOG \
--leak-check=full \
./zerotier-one node1 -p9996 -U >>node_1.log 2>&1 &
# Second instance, not run in memory profiler
$NS2 sudo ./zerotier-one node2 -U -p9997 >>node_2.log 2>&1 &
################################################################################
# Online Check #
################################################################################
echo "Waiting for ZeroTier to come online before attempting test..."
MAX_WAIT_SECS="${MAX_WAIT_SECS:-120}"
node1_online=false
node2_online=false
both_instances_online=false
time_zt_node1_start=$(date +%s)
time_zt_node2_start=$(date +%s)
for ((s = 0; s <= MAX_WAIT_SECS; s++)); do
node1_online="$($ZT1 -j info | jq '.online' 2>/dev/null)"
node2_online="$($ZT2 -j info | jq '.online' 2>/dev/null)"
echo "Checking for online status: try #$s, node1:$node1_online, node2:$node2_online"
if [[ "$node1_online" == "true" ]]; then
time_zt_node1_online=$(date +%s)
fi
if [[ "$node2_online" == "true" ]]; then
time_zt_node2_online=$(date +%s)
fi
if [[ "$node2_online" == "true" && "$node1_online" == "true" ]]; then
both_instances_online=true
break
fi
sleep 1
done
echo -e "\n\nContents of ZeroTier home paths:"
ls -lga node1
tree node1
ls -lga node2
tree node2
echo -e "\n\nRunning ZeroTier processes:"
echo -e "\nNode 1:"
$NS1 ps aux | grep zerotier-one
echo -e "\nNode 2:"
$NS2 ps aux | grep zerotier-one
echo -e "\n\nStatus of each instance:"
echo -e "\n\nNode 1:"
$ZT1 status
echo -e "\n\nNode 2:"
$ZT2 status
if [[ "$both_instances_online" != "true" ]]; then
echo "One or more instances of ZeroTier failed to come online. Aborting test."
exit 1
fi
echo -e "\nJoining networks"
$ZT1 join $TEST_NETWORK
$ZT2 join $TEST_NETWORK
sleep 10
node1_ip4=$($ZT1 get $TEST_NETWORK ip4)
node2_ip4=$($ZT2 get $TEST_NETWORK ip4)
echo "node1_ip4=$node1_ip4"
echo "node2_ip4=$node2_ip4"
echo -e "\nPinging each node"
PING12_FILENAME="$TEST_FILEPATH_PREFIX-ping-1-to-2.txt"
PING21_FILENAME="$TEST_FILEPATH_PREFIX-ping-2-to-1.txt"
$NS1 ping -c 16 $node2_ip4 >$PING12_FILENAME
$NS2 ping -c 16 $node1_ip4 >$PING21_FILENAME
# Parse ping statistics
ping_loss_percent_1_to_2="${ping_loss_percent_1_to_2:-100.0}"
ping_loss_percent_2_to_1="${ping_loss_percent_2_to_1:-100.0}"
ping_loss_percent_1_to_2=$(cat $PING12_FILENAME |
grep "packet loss" | awk '{print $6}' | sed 's/%//')
ping_loss_percent_2_to_1=$(cat $PING21_FILENAME |
grep "packet loss" | awk '{print $6}' | sed 's/%//')
# Normalize loss value
ping_loss_percent_1_to_2=$(echo "scale=2; $ping_loss_percent_1_to_2/100.0" | bc)
ping_loss_percent_2_to_1=$(echo "scale=2; $ping_loss_percent_2_to_1/100.0" | bc)
################################################################################
# CLI Check #
################################################################################
echo "Testing basic CLI functionality..."
spam_cli 10
$ZT1 join $TEST_NETWORK
$ZT1 -h
$ZT1 -v
$ZT1 status
$ZT1 info
$ZT1 listnetworks
$ZT1 peers
$ZT1 listpeers
$ZT1 -j status
$ZT1 -j info
$ZT1 -j listnetworks
$ZT1 -j peers
$ZT1 -j listpeers
$ZT1 dump
$ZT1 get $TEST_NETWORK allowDNS
$ZT1 get $TEST_NETWORK allowDefault
$ZT1 get $TEST_NETWORK allowGlobal
$ZT1 get $TEST_NETWORK allowManaged
$ZT1 get $TEST_NETWORK bridge
$ZT1 get $TEST_NETWORK broadcastEnabled
$ZT1 get $TEST_NETWORK dhcp
$ZT1 get $TEST_NETWORK id
$ZT1 get $TEST_NETWORK mac
$ZT1 get $TEST_NETWORK mtu
$ZT1 get $TEST_NETWORK name
$ZT1 get $TEST_NETWORK netconfRevision
$ZT1 get $TEST_NETWORK nwid
$ZT1 get $TEST_NETWORK portDeviceName
$ZT1 get $TEST_NETWORK portError
$ZT1 get $TEST_NETWORK status
$ZT1 get $TEST_NETWORK type
# Test an invalid command
$ZT1 get $TEST_NETWORK derpderp
# TODO: Validate JSON
################################################################################
# Performance Test #
################################################################################
FILENAME_PERF_JSON="$TEST_FILEPATH_PREFIX-iperf.json"
echo -e "\nBeginning performance test:"
echo -e "\nStarting server:"
echo "$NS1 iperf3 -s &"
sleep 1
echo -e "\nStarting client:"
sleep 1
echo "$NS2 iperf3 --json -c $node1_ip4 > $FILENAME_PERF_JSON"
cat $FILENAME_PERF_JSON
################################################################################
# Collect ZeroTier dump files #
################################################################################
echo -e "\nCollecting ZeroTier dump files"
node1_id=$($ZT1 -j status | jq -r .address)
node2_id=$($ZT2 -j status | jq -r .address)
$ZT1 dump
mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node1_id.txt"
$ZT2 dump
mv zerotier_dump.txt "$TEST_FILEPATH_PREFIX-node-dump-$node2_id.txt"
################################################################################
# Let ZeroTier idle long enough for various timers #
################################################################################
echo -e "\nIdling ZeroTier for $RUN_LENGTH seconds..."
sleep $RUN_LENGTH
echo -e "\nLeaving networks"
$ZT1 leave $TEST_NETWORK
$ZT2 leave $TEST_NETWORK
sleep 5
################################################################################
# Stop test #
################################################################################
echo -e "\nStopping memory check..."
sudo pkill -15 -f valgrind
sleep 10
time_test_end=$(date +%s)
################################################################################
# Rename ZeroTier stdout/stderr logs #
################################################################################
mv node_1.log "$TEST_FILEPATH_PREFIX-node-log-$node1_id.txt"
mv node_2.log "$TEST_FILEPATH_PREFIX-node-log-$node2_id.txt"
################################################################################
# Generate report #
################################################################################
cat $FILENAME_MEMORY_LOG
DEFINITELY_LOST=$(xmlstarlet sel -t -v '/valgrindoutput/error/xwhat' \
$FILENAME_MEMORY_LOG | grep "definitely" | awk '{print $1;}')
POSSIBLY_LOST=$(xmlstarlet sel -t -v '/valgrindoutput/error/xwhat' \
$FILENAME_MEMORY_LOG | grep "possibly" | awk '{print $1;}')
################################################################################
# Generate coverage report artifact and summary #
################################################################################
FILENAME_COVERAGE_JSON="$TEST_FILEPATH_PREFIX-coverage.json"
FILENAME_COVERAGE_HTML="$TEST_FILEPATH_PREFIX-coverage.html"
echo -e "\nGenerating coverage test report..."
gcovr -r . --exclude ext --json-summary $FILENAME_COVERAGE_JSON \
--html >$FILENAME_COVERAGE_HTML
cat $FILENAME_COVERAGE_JSON
COVERAGE_LINE_COVERED=$(cat $FILENAME_COVERAGE_JSON | jq .line_covered)
COVERAGE_LINE_TOTAL=$(cat $FILENAME_COVERAGE_JSON | jq .line_total)
COVERAGE_LINE_PERCENT=$(cat $FILENAME_COVERAGE_JSON | jq .line_percent)
COVERAGE_LINE_COVERED="${COVERAGE_LINE_COVERED:-0}"
COVERAGE_LINE_TOTAL="${COVERAGE_LINE_TOTAL:-0}"
COVERAGE_LINE_PERCENT="${COVERAGE_LINE_PERCENT:-0}"
################################################################################
# Default values #
################################################################################
DEFINITELY_LOST="${DEFINITELY_LOST:-0}"
POSSIBLY_LOST="${POSSIBLY_LOST:-0}"
################################################################################
# Summarize and emit json for trend reporting #
################################################################################
FILENAME_SUMMARY="$TEST_FILEPATH_PREFIX-summary.json"
time_length_test=$((time_test_end - time_test_start))
time_length_zt_node1_online=$((time_zt_node1_online - time_zt_start))
time_length_zt_node2_online=$((time_zt_node2_online - time_zt_start))
#time_length_zt_join=$((time_zt_join_end-time_zt_join_start))
#time_length_zt_leave=$((time_zt_leave_end-time_zt_leave_start))
#time_length_zt_can_still_ping=$((time_zt_can_still_ping-time_zt_leave_start))
summary=$(
cat <<EOF
{
"version":"$ZTO_VER",
"commit":"$ZTO_COMMIT",
"arch_m":"$(uname -m)",
"arch_a":"$(uname -a)",
"time_length_test":$time_length_test,
"time_length_zt_node1_online":$time_length_zt_node1_online,
"time_length_zt_node2_online":$time_length_zt_node2_online,
"num_possible_bytes_lost": $POSSIBLY_LOST,
"num_definite_bytes_lost": $DEFINITELY_LOST,
"num_incorrect_settings": $POSSIBLY_LOST,
"num_bad_formattings": $POSSIBLY_LOST,
"percent_coverage_branches": $POSSIBLY_LOST,
"coverage_lines_covered": $COVERAGE_LINE_COVERED,
"coverage_lines_total": $COVERAGE_LINE_TOTAL,
"coverage_lines_percent": $COVERAGE_LINE_PERCENT,
"ping_loss_percent_1_to_2": $ping_loss_percent_1_to_2,
"ping_loss_percent_2_to_1": $ping_loss_percent_2_to_1,
"mean_latency_ping_random": $POSSIBLY_LOST,
"mean_latency_ping_netns": $POSSIBLY_LOST,
"mean_pdv_random": $POSSIBLY_LOST,
"mean_pdv_netns": $POSSIBLY_LOST,
"mean_perf_netns": $POSSIBLY_LOST
}
EOF
)
echo $summary >$FILENAME_SUMMARY
cat $FILENAME_SUMMARY
}
################################################################################
# CLI Check #
################################################################################
spam_cli() {
echo "Spamming CLI..."
# Rapidly spam the CLI with joins/leaves
MAX_TRIES="${1:-10}"
for ((s = 0; s <= MAX_TRIES; s++)); do
$ZT1 status
$ZT2 status
sleep 0.1
done
SPAM_TRIES=128
for ((s = 0; s <= SPAM_TRIES; s++)); do
$ZT1 join $TEST_NETWORK
done
for ((s = 0; s <= SPAM_TRIES; s++)); do
$ZT1 leave $TEST_NETWORK
done
for ((s = 0; s <= SPAM_TRIES; s++)); do
$ZT1 leave $TEST_NETWORK
$ZT1 join $TEST_NETWORK
done
}
main "$@"

56
.github/workflows/validate.yml vendored Normal file
View file

@ -0,0 +1,56 @@
on:
push:
workflow_dispatch:
jobs:
build_ubuntu:
runs-on: ubuntu-latest
steps:
- name: gitconfig
run: |
git config --global core.autocrlf input
- name: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
target: x86_64-unknown-linux-gnu
override: true
components: rustfmt, clippy
- name: Set up cargo cache
uses: Swatinem/rust-cache@v2
continue-on-error: false
with:
key: ${{ runner.os }}-cargo-${{ hashFiles('zeroidc//Cargo.lock') }}
shared-key: ${{ runner.os }}-cargo-
workspaces: |
zeroidc/
- name: validate-1m-linux
env:
CC: 'gcc'
CXX: 'g++'
BRANCH: ${{ github.ref_name }}
run: |
sudo apt install -y valgrind xmlstarlet gcovr iperf3 tree
make one ZT_COVERAGE=1 ZT_TRACE=1
sudo chmod +x ./.github/workflows/validate-1m-linux.sh
sudo ./.github/workflows/validate-1m-linux.sh
- name: Archive test results
uses: actions/upload-artifact@v3
with:
name: ${{github.sha}}-test-results
path: "*test-results*"
- name: final-report
run: |
sudo chmod +x ./.github/workflows/report.sh
sudo ./.github/workflows/report.sh

View file

@ -37,7 +37,6 @@ The base path contains the ZeroTier One service main entry point (`one.cpp`), se
- `ext/`: third party libraries, binaries that we ship for convenience on some platforms (Mac and Windows), and installation support files.
- `include/`: include files for the ZeroTier core.
- `java/`: a JNI wrapper used with our Android mobile app. (The whole Android app is not open source but may be made so in the future.)
- `macui/`: a Macintosh menu-bar app for controlling ZeroTier One, written in Objective C.
- `node/`: the ZeroTier virtual Ethernet switch core, which is designed to be entirely separate from the rest of the code and able to be built as a stand-alone OS-independent library. Note to developers: do not use C++11 features in here, since we want this to build on old embedded platforms that lack C++11 support. C++11 can be used elsewhere.
- `osdep/`: code to support and integrate with OSes, including platform-specific stuff only built for certain targets.
- `rule-compiler/`: JavaScript rules language compiler for defining network-level rules.
@ -105,8 +104,69 @@ On CentOS check `/etc/sysconfig/iptables` for IPTables rules. For other distribu
ZeroTier One peers will automatically locate each other and communicate directly over a local wired LAN *if UDP port 9993 inbound is open*. If that port is filtered, they won't be able to see each others' LAN announcement packets. If you're experiencing poor performance between devices on the same physical network, check their firewall settings. Without LAN auto-location peers must attempt "loopback" NAT traversal, which sometimes fails and in any case requires that every packet traverse your external router twice.
Users behind certain types of firewalls and "symmetric" NAT devices may not able able to connect to external peers directly at all. ZeroTier has limited support for port prediction and will *attempt* to traverse symmetric NATs, but this doesn't always work. If P2P connectivity fails you'll be bouncing UDP packets off our relay servers resulting in slower performance. Some NAT router(s) have a configurable NAT mode, and setting this to "full cone" will eliminate this problem. If you do this you may also see a magical improvement for things like VoIP phones, Skype, BitTorrent, WebRTC, certain games, etc., since all of these use NAT traversal techniques similar to ours.
Users behind certain types of firewalls and "symmetric" NAT devices may not be able to connect to external peers directly at all. ZeroTier has limited support for port prediction and will *attempt* to traverse symmetric NATs, but this doesn't always work. If P2P connectivity fails you'll be bouncing UDP packets off our relay servers resulting in slower performance. Some NAT router(s) have a configurable NAT mode, and setting this to "full cone" will eliminate this problem. If you do this you may also see a magical improvement for things like VoIP phones, Skype, BitTorrent, WebRTC, certain games, etc., since all of these use NAT traversal techniques similar to ours.
If a firewall between you and the Internet blocks ZeroTier's UDP traffic, you will fall back to last-resort TCP tunneling to rootservers over port 443 (https impersonation). This will work almost anywhere but is *very slow* compared to UDP or direct peer to peer connectivity.
Additional help can be found in our [knowledge base](https://zerotier.atlassian.net/wiki/spaces/SD/overview).
### Prometheus Metrics
Prometheus Metrics are available at the `/metrics` API endpoint. This endpoint is protected by an API key stored in `authtoken.secret` because of the possibility of information leakage. Information that could be gleaned from the metrics include joined networks and peers your instance is talking to.
Access control is via the ZeroTier control interface itself and `authtoken.secret`. This can be sent as the `X-ZT1-Auth` HTTP header field or appended to the URL as `?auth=<token>`. You can see the current metrics via `cURL` with the following command:
// Linux
curl -H "X-ZT1-Auth: $(sudo cat /var/lib/zerotier-one/authtoken.secret)" http://localhost:9993/metrics
// macOS
curl -H "X-XT1-Auth: $(sudo cat /Library/Application\ Support/ZeroTier/One/authtoken.secret)" http://localhost:9993/metrics
// Windows PowerShell (Admin)
Invoke-RestMethod -Headers @{'X-ZT1-Auth' = "$(Get-Content C:\ProgramData\ZeroTier\One\authtoken.secret)"; } -Uri http://localhost:9993/metrics
To configure a scrape job in Prometheus on the machine ZeroTier is running on, add this to your Prometheus `scrape_config`:
- job_name: zerotier-one
honor_labels: true
scrape_interval: 15s
metrics_path: /metrics
static_configs:
- targets:
- 127.0.0.1:9993
labels:
group: zerotier-one
params:
auth:
- $YOUR_AUTHTOKEN_SECRET
If your Prometheus instance is remote from the machine ZeroTier instance, you'll have to edit your `local.conf` file to allow remote access to the API control port. If your local lan is `10.0.0.0/24`, edit your `local.conf` as follows:
{
"settings": {
"allowManagementFrom:" ["10.0.0.0/24"]
}
}
Substitute your actual network IP ranges as necessary.
It's also possible to access the metrics & control port over the ZeroTier network itself via the same method shown above. Just add the address range of your ZeroTier network to the list. NOTE: Using this method means that anyone with your auth token can control your ZeroTier instance, including leaving & joining other networks.
If neither of these methods are desirable, it is probably possible to distribute metrics via [Prometheus Proxy](https://github.com/pambrose/prometheus-proxy) or some other tool. Note: We have not tested this internally, but will probably work with the correct configuration.
#### Available Metrics
| Metric Name | Labels | Metric Type | Description |
| --- | --- | --- | --- |
| zt_packet | packet_type, direction | Counter | ZeroTier packet type counts |
| zt_packet_error | error_type, direction | Counter | ZeroTier packet errors|
| zt_data | protocol, direction | Counter | number of bytes ZeroTier has transmitted or received |
| zt_num_networks | | Gauge | number of networks this instance is joined to |
| zt_network_multicast_groups_subscribed | network_id | Gauge | number of multicast groups networks are subscribed to |
| zt_network_packets | network_id, direction | Counter | number of incoming/outgoing packets per network |
| zt_peer_latency | node_id | Histogram | peer latency (ms) |
| zt_peer_path_count | node_id, status | Gauge | number of paths to peer |
| zt_peer_packets | node_id, direction | Counter | number of packets to/from a peer |
| zt_peer_packet_errors | node_id | Counter | number of incoming packet errors from a peer |
If there are other metrics you'd like to see tracked, ask us in an Issue or send us a Pull Request!

View file

@ -108,16 +108,17 @@ DB::~DB() {}
bool DB::get(const uint64_t networkId,nlohmann::json &network)
{
waitForReady();
Metrics::db_get_network++;
std::shared_ptr<_Network> nw;
{
std::lock_guard<std::mutex> l(_networks_l);
std::shared_lock<std::shared_mutex> l(_networks_l);
auto nwi = _networks.find(networkId);
if (nwi == _networks.end())
return false;
nw = nwi->second;
}
{
std::lock_guard<std::mutex> l2(nw->lock);
std::shared_lock<std::shared_mutex> l2(nw->lock);
network = nw->config;
}
return true;
@ -126,16 +127,17 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network)
bool DB::get(const uint64_t networkId,nlohmann::json &network,const uint64_t memberId,nlohmann::json &member)
{
waitForReady();
Metrics::db_get_network_and_member++;
std::shared_ptr<_Network> nw;
{
std::lock_guard<std::mutex> l(_networks_l);
std::shared_lock<std::shared_mutex> l(_networks_l);
auto nwi = _networks.find(networkId);
if (nwi == _networks.end())
return false;
nw = nwi->second;
}
{
std::lock_guard<std::mutex> l2(nw->lock);
std::shared_lock<std::shared_mutex> l2(nw->lock);
network = nw->config;
auto m = nw->members.find(memberId);
if (m == nw->members.end())
@ -148,16 +150,17 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network,const uint64_t mem
bool DB::get(const uint64_t networkId,nlohmann::json &network,const uint64_t memberId,nlohmann::json &member,NetworkSummaryInfo &info)
{
waitForReady();
Metrics::db_get_network_and_member_and_summary++;
std::shared_ptr<_Network> nw;
{
std::lock_guard<std::mutex> l(_networks_l);
std::shared_lock<std::shared_mutex> l(_networks_l);
auto nwi = _networks.find(networkId);
if (nwi == _networks.end())
return false;
nw = nwi->second;
}
{
std::lock_guard<std::mutex> l2(nw->lock);
std::shared_lock<std::shared_mutex> l2(nw->lock);
network = nw->config;
_fillSummaryInfo(nw,info);
auto m = nw->members.find(memberId);
@ -171,16 +174,17 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network,const uint64_t mem
bool DB::get(const uint64_t networkId,nlohmann::json &network,std::vector<nlohmann::json> &members)
{
waitForReady();
Metrics::db_get_member_list++;
std::shared_ptr<_Network> nw;
{
std::lock_guard<std::mutex> l(_networks_l);
std::shared_lock<std::shared_mutex> l(_networks_l);
auto nwi = _networks.find(networkId);
if (nwi == _networks.end())
return false;
nw = nwi->second;
}
{
std::lock_guard<std::mutex> l2(nw->lock);
std::shared_lock<std::shared_mutex> l2(nw->lock);
network = nw->config;
for(auto m=nw->members.begin();m!=nw->members.end();++m) {
members.push_back(m->second);
@ -192,13 +196,15 @@ bool DB::get(const uint64_t networkId,nlohmann::json &network,std::vector<nlohma
void DB::networks(std::set<uint64_t> &networks)
{
waitForReady();
std::lock_guard<std::mutex> l(_networks_l);
Metrics::db_get_member_list++;
std::shared_lock<std::shared_mutex> l(_networks_l);
for(auto n=_networks.begin();n!=_networks.end();++n)
networks.insert(n->first);
}
void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool notifyListeners)
{
Metrics::db_member_change++;
uint64_t memberId = 0;
uint64_t networkId = 0;
bool isAuth = false;
@ -210,14 +216,14 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
networkId = OSUtils::jsonIntHex(old["nwid"],0ULL);
if ((memberId)&&(networkId)) {
{
std::lock_guard<std::mutex> l(_networks_l);
std::unique_lock<std::shared_mutex> l(_networks_l);
auto nw2 = _networks.find(networkId);
if (nw2 != _networks.end()) {
nw = nw2->second;
}
}
if (nw) {
std::lock_guard<std::mutex> l(nw->lock);
std::unique_lock<std::shared_mutex> l(nw->lock);
if (OSUtils::jsonBool(old["activeBridge"],false)) {
nw->activeBridgeMembers.erase(memberId);
}
@ -247,7 +253,7 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
networkId = OSUtils::jsonIntHex(memberConfig["nwid"],0ULL);
if ((!memberId)||(!networkId))
return;
std::lock_guard<std::mutex> l(_networks_l);
std::unique_lock<std::shared_mutex> l(_networks_l);
std::shared_ptr<_Network> &nw2 = _networks[networkId];
if (!nw2)
nw2.reset(new _Network);
@ -255,7 +261,7 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
}
{
std::lock_guard<std::mutex> l(nw->lock);
std::unique_lock<std::shared_mutex> l(nw->lock);
nw->members[memberId] = memberConfig;
@ -288,18 +294,18 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
}
if (notifyListeners) {
std::lock_guard<std::mutex> ll(_changeListeners_l);
std::unique_lock<std::shared_mutex> ll(_changeListeners_l);
for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
(*i)->onNetworkMemberUpdate(this,networkId,memberId,memberConfig);
}
}
} else if (memberId) {
if (nw) {
std::lock_guard<std::mutex> l(nw->lock);
std::unique_lock<std::shared_mutex> l(nw->lock);
nw->members.erase(memberId);
}
if (networkId) {
std::lock_guard<std::mutex> l(_networks_l);
std::unique_lock<std::shared_mutex> l(_networks_l);
auto er = _networkByMember.equal_range(memberId);
for(auto i=er.first;i!=er.second;++i) {
if (i->second == networkId) {
@ -329,7 +335,7 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
}
if ((notifyListeners)&&((wasAuth)&&(!isAuth)&&(networkId)&&(memberId))) {
std::lock_guard<std::mutex> ll(_changeListeners_l);
std::unique_lock<std::shared_mutex> ll(_changeListeners_l);
for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
(*i)->onNetworkMemberDeauthorize(this,networkId,memberId);
}
@ -338,6 +344,7 @@ void DB::_memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool no
void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool notifyListeners)
{
Metrics::db_network_change++;
if (notifyListeners) {
if (old.is_object() && old.contains("id") && networkConfig.is_object() && networkConfig.contains("id")) {
Metrics::network_changes++;
@ -354,18 +361,18 @@ void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool
if (networkId) {
std::shared_ptr<_Network> nw;
{
std::lock_guard<std::mutex> l(_networks_l);
std::unique_lock<std::shared_mutex> l(_networks_l);
std::shared_ptr<_Network> &nw2 = _networks[networkId];
if (!nw2)
nw2.reset(new _Network);
nw = nw2;
}
{
std::lock_guard<std::mutex> l2(nw->lock);
std::unique_lock<std::shared_mutex> l2(nw->lock);
nw->config = networkConfig;
}
if (notifyListeners) {
std::lock_guard<std::mutex> ll(_changeListeners_l);
std::unique_lock<std::shared_mutex> ll(_changeListeners_l);
for(auto i=_changeListeners.begin();i!=_changeListeners.end();++i) {
(*i)->onNetworkUpdate(this,networkId,networkConfig);
}
@ -375,7 +382,7 @@ void DB::_networkChanged(nlohmann::json &old,nlohmann::json &networkConfig,bool
const std::string ids = old["id"];
const uint64_t networkId = Utils::hexStrToU64(ids.c_str());
if (networkId) {
std::lock_guard<std::mutex> l(_networks_l);
std::unique_lock<std::shared_mutex> l(_networks_l);
_networks.erase(networkId);
}
}

View file

@ -29,7 +29,7 @@
#include <unordered_set>
#include <vector>
#include <atomic>
#include <mutex>
#include <shared_mutex>
#include <set>
#include <map>
@ -109,7 +109,7 @@ public:
inline bool hasNetwork(const uint64_t networkId) const
{
std::lock_guard<std::mutex> l(_networks_l);
std::shared_lock<std::shared_mutex> l(_networks_l);
return (_networks.find(networkId) != _networks.end());
}
@ -124,7 +124,7 @@ public:
inline void each(F f)
{
nlohmann::json nullJson;
std::lock_guard<std::mutex> lck(_networks_l);
std::unique_lock<std::shared_mutex> lck(_networks_l);
for(auto nw=_networks.begin();nw!=_networks.end();++nw) {
f(nw->first,nw->second->config,0,nullJson); // first provide network with 0 for member ID
for(auto m=nw->second->members.begin();m!=nw->second->members.end();++m) {
@ -142,7 +142,7 @@ public:
inline void addListener(DB::ChangeListener *const listener)
{
std::lock_guard<std::mutex> l(_changeListeners_l);
std::unique_lock<std::shared_mutex> l(_changeListeners_l);
_changeListeners.push_back(listener);
}
@ -178,7 +178,7 @@ protected:
std::unordered_set<uint64_t> authorizedMembers;
std::unordered_set<InetAddress,InetAddress::Hasher> allocatedIps;
int64_t mostRecentDeauthTime;
std::mutex lock;
std::shared_mutex lock;
};
virtual void _memberChanged(nlohmann::json &old,nlohmann::json &memberConfig,bool notifyListeners);
@ -188,8 +188,8 @@ protected:
std::vector<DB::ChangeListener *> _changeListeners;
std::unordered_map< uint64_t,std::shared_ptr<_Network> > _networks;
std::unordered_multimap< uint64_t,uint64_t > _networkByMember;
mutable std::mutex _changeListeners_l;
mutable std::mutex _networks_l;
mutable std::shared_mutex _changeListeners_l;
mutable std::shared_mutex _networks_l;
};
} // namespace ZeroTier

View file

@ -15,9 +15,12 @@
namespace ZeroTier {
DBMirrorSet::DBMirrorSet(DB::ChangeListener *listener) :
_listener(listener),
_running(true)
DBMirrorSet::DBMirrorSet(DB::ChangeListener *listener)
: _listener(listener)
, _running(true)
, _syncCheckerThread()
, _dbs()
, _dbs_l()
{
_syncCheckerThread = std::thread([this]() {
for(;;) {
@ -29,7 +32,7 @@ DBMirrorSet::DBMirrorSet(DB::ChangeListener *listener) :
std::vector< std::shared_ptr<DB> > dbs;
{
std::lock_guard<std::mutex> l(_dbs_l);
std::unique_lock<std::shared_mutex> l(_dbs_l);
if (_dbs.size() <= 1)
continue; // no need to do this if there's only one DB, so skip the iteration
dbs = _dbs;
@ -76,7 +79,7 @@ DBMirrorSet::~DBMirrorSet()
bool DBMirrorSet::hasNetwork(const uint64_t networkId) const
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
if ((*d)->hasNetwork(networkId))
return true;
@ -86,7 +89,7 @@ bool DBMirrorSet::hasNetwork(const uint64_t networkId) const
bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
if ((*d)->get(networkId,network)) {
return true;
@ -97,7 +100,7 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network)
bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,const uint64_t memberId,nlohmann::json &member)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
if ((*d)->get(networkId,network,memberId,member))
return true;
@ -107,7 +110,7 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,const uin
bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,const uint64_t memberId,nlohmann::json &member,DB::NetworkSummaryInfo &info)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
if ((*d)->get(networkId,network,memberId,member,info))
return true;
@ -117,7 +120,7 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,const uin
bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,std::vector<nlohmann::json> &members)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
if ((*d)->get(networkId,network,members))
return true;
@ -127,7 +130,7 @@ bool DBMirrorSet::get(const uint64_t networkId,nlohmann::json &network,std::vect
AuthInfo DBMirrorSet::getSSOAuthInfo(const nlohmann::json &member, const std::string &redirectURL)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
AuthInfo info = (*d)->getSSOAuthInfo(member, redirectURL);
if (info.enabled) {
@ -139,7 +142,7 @@ AuthInfo DBMirrorSet::getSSOAuthInfo(const nlohmann::json &member, const std::st
void DBMirrorSet::networks(std::set<uint64_t> &networks)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
(*d)->networks(networks);
}
@ -148,7 +151,7 @@ void DBMirrorSet::networks(std::set<uint64_t> &networks)
bool DBMirrorSet::waitForReady()
{
bool r = false;
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
r |= (*d)->waitForReady();
}
@ -157,7 +160,7 @@ bool DBMirrorSet::waitForReady()
bool DBMirrorSet::isReady()
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
if (!(*d)->isReady())
return false;
@ -169,7 +172,7 @@ bool DBMirrorSet::save(nlohmann::json &record,bool notifyListeners)
{
std::vector< std::shared_ptr<DB> > dbs;
{
std::lock_guard<std::mutex> l(_dbs_l);
std::unique_lock<std::shared_mutex> l(_dbs_l);
dbs = _dbs;
}
if (notifyListeners) {
@ -189,7 +192,7 @@ bool DBMirrorSet::save(nlohmann::json &record,bool notifyListeners)
void DBMirrorSet::eraseNetwork(const uint64_t networkId)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::unique_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
(*d)->eraseNetwork(networkId);
}
@ -197,7 +200,7 @@ void DBMirrorSet::eraseNetwork(const uint64_t networkId)
void DBMirrorSet::eraseMember(const uint64_t networkId,const uint64_t memberId)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::unique_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
(*d)->eraseMember(networkId,memberId);
}
@ -205,7 +208,7 @@ void DBMirrorSet::eraseMember(const uint64_t networkId,const uint64_t memberId)
void DBMirrorSet::nodeIsOnline(const uint64_t networkId,const uint64_t memberId,const InetAddress &physicalAddress)
{
std::lock_guard<std::mutex> l(_dbs_l);
std::shared_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
(*d)->nodeIsOnline(networkId,memberId,physicalAddress);
}
@ -214,7 +217,7 @@ void DBMirrorSet::nodeIsOnline(const uint64_t networkId,const uint64_t memberId,
void DBMirrorSet::onNetworkUpdate(const void *db,uint64_t networkId,const nlohmann::json &network)
{
nlohmann::json record(network);
std::lock_guard<std::mutex> l(_dbs_l);
std::unique_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
if (d->get() != db) {
(*d)->save(record,false);
@ -226,7 +229,7 @@ void DBMirrorSet::onNetworkUpdate(const void *db,uint64_t networkId,const nlohma
void DBMirrorSet::onNetworkMemberUpdate(const void *db,uint64_t networkId,uint64_t memberId,const nlohmann::json &member)
{
nlohmann::json record(member);
std::lock_guard<std::mutex> l(_dbs_l);
std::unique_lock<std::shared_mutex> l(_dbs_l);
for(auto d=_dbs.begin();d!=_dbs.end();++d) {
if (d->get() != db) {
(*d)->save(record,false);

View file

@ -18,7 +18,7 @@
#include <vector>
#include <memory>
#include <mutex>
#include <shared_mutex>
#include <set>
#include <thread>
@ -56,7 +56,7 @@ public:
inline void addDB(const std::shared_ptr<DB> &db)
{
db->addListener(this);
std::lock_guard<std::mutex> l(_dbs_l);
std::unique_lock<std::shared_mutex> l(_dbs_l);
_dbs.push_back(db);
}
@ -65,7 +65,7 @@ private:
std::atomic_bool _running;
std::thread _syncCheckerThread;
std::vector< std::shared_ptr< DB > > _dbs;
mutable std::mutex _dbs_l;
mutable std::shared_mutex _dbs_l;
};
} // namespace ZeroTier

File diff suppressed because it is too large Load diff

View file

@ -37,6 +37,8 @@
#include <nlohmann/json.hpp>
#include <cpp-httplib/httplib.h>
#include "DB.hpp"
#include "DBMirrorSet.hpp"
@ -66,27 +68,9 @@ public:
const Identity &identity,
const Dictionary<ZT_NETWORKCONFIG_METADATA_DICT_CAPACITY> &metaData);
unsigned int handleControlPlaneHttpGET(
const std::vector<std::string> &path,
const std::map<std::string,std::string> &urlArgs,
const std::map<std::string,std::string> &headers,
const std::string &body,
std::string &responseBody,
std::string &responseContentType);
unsigned int handleControlPlaneHttpPOST(
const std::vector<std::string> &path,
const std::map<std::string,std::string> &urlArgs,
const std::map<std::string,std::string> &headers,
const std::string &body,
std::string &responseBody,
std::string &responseContentType);
unsigned int handleControlPlaneHttpDELETE(
const std::vector<std::string> &path,
const std::map<std::string,std::string> &urlArgs,
const std::map<std::string,std::string> &headers,
const std::string &body,
std::string &responseBody,
std::string &responseContentType);
void configureHTTPControlPlane(
httplib::Server &s,
const std::function<void(const httplib::Request&, httplib::Response&, std::string)>);
void handleRemoteTrace(const ZT_RemoteTrace &rt);
@ -97,6 +81,9 @@ public:
private:
void _request(uint64_t nwid,const InetAddress &fromAddr,uint64_t requestPacketId,const Identity &identity,const Dictionary<ZT_NETWORKCONFIG_METADATA_DICT_CAPACITY> &metaData);
void _startThreads();
void _ssoExpiryThread();
std::string networkUpdateFromPostData(uint64_t networkID, const std::string &body);
struct _RQEntry
{
@ -160,6 +147,34 @@ private:
RedisConfig *_rc;
std::string _ssoRedirectURL;
bool _ssoExpiryRunning;
std::thread _ssoExpiry;
#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
prometheus::simpleapi::benchmark_family_t _member_status_lookup;
prometheus::simpleapi::counter_family_t _member_status_lookup_count;
prometheus::simpleapi::benchmark_family_t _node_is_online;
prometheus::simpleapi::counter_family_t _node_is_online_count;
prometheus::simpleapi::benchmark_family_t _get_and_init_member;
prometheus::simpleapi::counter_family_t _get_and_init_member_count;
prometheus::simpleapi::benchmark_family_t _have_identity;
prometheus::simpleapi::counter_family_t _have_identity_count;
prometheus::simpleapi::benchmark_family_t _determine_auth;
prometheus::simpleapi::counter_family_t _determine_auth_count;
prometheus::simpleapi::benchmark_family_t _sso_check;
prometheus::simpleapi::counter_family_t _sso_check_count;
prometheus::simpleapi::benchmark_family_t _auth_check;
prometheus::simpleapi::counter_family_t _auth_check_count;
prometheus::simpleapi::benchmark_family_t _json_schlep;
prometheus::simpleapi::counter_family_t _json_schlep_count;
prometheus::simpleapi::benchmark_family_t _issue_certificate;
prometheus::simpleapi::counter_family_t _issue_certificate_count;
prometheus::simpleapi::benchmark_family_t _save_member;
prometheus::simpleapi::counter_family_t _save_member_count;
prometheus::simpleapi::benchmark_family_t _send_netconf;
prometheus::simpleapi::counter_family_t _send_netconf_count;
#endif
};
} // namespace ZeroTier

View file

@ -374,6 +374,7 @@ void PostgreSQL::nodeIsOnline(const uint64_t networkId, const uint64_t memberId,
AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::string &redirectURL)
{
Metrics::db_get_sso_info++;
// NONCE is just a random character string. no semantic meaning
// state = HMAC SHA384 of Nonce based on shared sso key
//
@ -388,9 +389,16 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
char authenticationURL[4096] = {0};
AuthInfo info;
info.enabled = true;
//if (memberId == "a10dccea52" && networkId == "8056c2e21c24673d") {
// fprintf(stderr, "invalid authinfo for grant's machine\n");
// info.version=1;
// return info;
//}
// fprintf(stderr, "PostgreSQL::updateMemberOnLoad: %s-%s\n", networkId.c_str(), memberId.c_str());
std::shared_ptr<PostgresConnection> c;
try {
auto c = _pool->borrow();
c = _pool->borrow();
pqxx::work w(*c->c);
char nonceBytes[16] = {0};
@ -452,7 +460,7 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
"LEFT OUTER JOIN ztc_network_oidc_config noc "
" ON noc.network_id = n.id "
"LEFT OUTER JOIN ztc_oidc_config oc "
" ON noc.client_id = oc.client_id AND noc.org_id = o.org_id "
" ON noc.client_id = oc.client_id AND oc.org_id = o.org_id "
"WHERE n.id = $1 AND n.sso_enabled = true", networkId);
std::string client_id = "";
@ -462,11 +470,11 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
uint64_t sso_version = 0;
if (r.size() == 1) {
client_id = r.at(0)[0].as<std::string>();
authorization_endpoint = r.at(0)[1].as<std::string>();
issuer = r.at(0)[2].as<std::string>();
provider = r.at(0)[3].as<std::string>();
sso_version = r.at(0)[4].as<uint64_t>();
client_id = r.at(0)[0].as<std::optional<std::string>>().value_or("");
authorization_endpoint = r.at(0)[1].as<std::optional<std::string>>().value_or("");
issuer = r.at(0)[2].as<std::optional<std::string>>().value_or("");
provider = r.at(0)[3].as<std::optional<std::string>>().value_or("");
sso_version = r.at(0)[4].as<std::optional<uint64_t>>().value_or(1);
} else if (r.size() > 1) {
fprintf(stderr, "ERROR: More than one auth endpoint for an organization?!?!? NetworkID: %s\n", networkId.c_str());
} else {
@ -776,7 +784,7 @@ void PostgreSQL::initializeMembers()
if (_redisMemberStatus) {
fprintf(stderr, "Initialize Redis for members...\n");
std::lock_guard<std::mutex> l(_networks_l);
std::unique_lock<std::shared_mutex> l(_networks_l);
std::unordered_set<std::string> deletes;
for ( auto it : _networks) {
uint64_t nwid_i = it.first;
@ -1040,7 +1048,6 @@ void PostgreSQL::heartbeat()
w.commit();
} catch (std::exception &e) {
fprintf(stderr, "%s: Heartbeat update failed: %s\n", controllerId, e.what());
_pool->unborrow(c);
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
continue;
}
@ -1270,6 +1277,7 @@ void PostgreSQL::commitThread()
continue;
}
Metrics::pgsql_commit_ticks++;
try {
nlohmann::json &config = (qitem.first);
const std::string objtype = config["objtype"];
@ -1596,7 +1604,6 @@ void PostgreSQL::commitThread()
}
_pool->unborrow(c);
c.reset();
std::this_thread::sleep_for(std::chrono::milliseconds(100));
}
fprintf(stderr, "%s commitThread finished\n", _myAddressStr.c_str());
@ -1812,7 +1819,7 @@ uint64_t PostgreSQL::_doRedisUpdate(sw::redis::Transaction &tx, std::string &con
sw::redis::RightBoundedInterval<double>(expireOld,
sw::redis::BoundType::LEFT_OPEN));
{
std::lock_guard<std::mutex> l(_networks_l);
std::shared_lock<std::shared_mutex> l(_networks_l);
for (const auto &it : _networks) {
uint64_t nwid_i = it.first;
char nwidTmp[64];

View file

@ -3,7 +3,7 @@ Network Controller Microservice
Every ZeroTier virtual network has a *network controller* responsible for admitting members to the network, issuing certificates, and issuing default configuration information.
This is our reference controller implementation and is the same one we use to power our own hosted services at [my.zerotier.com](https://my.zerotier.com/). As of ZeroTier One version 1.2.0 this code is included in normal builds for desktop, laptop, and server (Linux, etc.) targets.
This is our reference controller implementation and is almost the same as the one we use to power our own hosted services at [my.zerotier.com](https://my.zerotier.com/). The only difference is the database backend used.
Controller data is stored in JSON format under `controller.d` in the ZeroTier working directory. It can be copied, rsync'd, placed in `git`, etc. The files under `controller.d` should not be modified in place while the controller is running or data loss may result, and if they are edited directly take care not to save corrupt JSON since that can also lead to data loss when the controller is restarted. Going through the API is strongly preferred to directly modifying these files.
@ -19,10 +19,6 @@ Since ZeroTier nodes are mobile and do not need static IPs, implementing high av
ZeroTier network controllers can easily be run in Docker or other container systems. Since containers do not need to actually join networks, extra privilege options like "--device=/dev/net/tun --privileged" are not needed. You'll just need to map the local JSON API port of the running controller and allow it to access the Internet (over UDP/9993 at a minimum) so things can reach and query it.
### PostgreSQL Database Implementation
The default controller stores its data in the filesystem in `controller.d` under ZeroTier's home folder. There's an alternative implementation that stores data in PostgreSQL that can be built with `make central-controller`. Right now this is only guaranteed to build and run on Centos 7 Linux with PostgreSQL 10 installed via the [PostgreSQL Yum Repository](https://www.postgresql.org/download/linux/redhat/) and is designed for use with [ZeroTier Central](https://my.zerotier.com/). You're welcome to use it but we don't "officially" support it for end-user use and it could change at any time.
### Upgrading from Older (1.1.14 or earlier) Versions
Older versions of this code used a SQLite database instead of in-filesystem JSON. A migration utility called `migrate-sqlite` is included here and *must* be used to migrate this data to the new format. If the controller is started with an old `controller.db` in its working directory it will terminate after printing an error to *stderr*. This is done to prevent "surprises" for those running DIY controllers using the old code.
@ -43,210 +39,17 @@ While networks with any valid ID can be added to the controller's database, it w
The controller JSON API is *very* sensitive about types. Integers must be integers and strings strings, etc. Incorrect types may be ignored, set to default values, or set to undefined values.
#### `/controller`
Full documentation of the Controller API can be found on our [documentation site](https://docs.zerotier.com/service/v1#tag/controller)
* Purpose: Check for controller function and return controller status
* Methods: GET
* Returns: { object }
### Prometheus Metrics
| Field | Type | Description | Writable |
| ------------------ | ----------- | ------------------------------------------------- | -------- |
| controller | boolean | Always 'true' | no |
| apiVersion | integer | Controller API version, currently 3 | no |
| clock | integer | Current clock on controller, ms since epoch | no |
#### `/controller/network`
* Purpose: List all networks hosted by this controller
* Methods: GET
* Returns: [ string, ... ]
This returns an array of 16-digit hexadecimal network IDs.
#### `/controller/network/<network ID>`
* Purpose: Create, configure, and delete hosted networks
* Methods: GET, POST, DELETE
* Returns: { object }
By making queries to this path you can create, configure, and delete networks. DELETE is final, so don't do it unless you really mean it.
When POSTing new networks take care that their IDs are not in use, otherwise you may overwrite an existing one. To create a new network with a random unused ID, POST to `/controller/network/##########______`. The #'s are the controller's 10-digit ZeroTier address and they're followed by six underscores. Check the `nwid` field of the returned JSON object for your network's newly allocated ID. Subsequent POSTs to this network must refer to its actual path.
Example:
`curl -X POST --header "X-ZT1-Auth: secret" -d '{"name":"my network"}' http://localhost:9993/controller/network/305f406058______`
**Network object format:**
| Field | Type | Description | Writable |
| --------------------- | ------------- | ------------------------------------------------- | -------- |
| id | string | 16-digit network ID | no |
| nwid | string | 16-digit network ID (legacy) | no |
| objtype | string | Always "network" | no |
| name | string | A short name for this network | YES |
| creationTime | integer | Time network record was created (ms since epoch) | no |
| private | boolean | Is access control enabled? | YES |
| enableBroadcast | boolean | Ethernet ff:ff:ff:ff:ff:ff allowed? | YES |
| v4AssignMode | object | IPv4 management and assign options (see below) | YES |
| v6AssignMode | object | IPv6 management and assign options (see below) | YES |
| mtu | integer | Network MTU (default: 2800) | YES |
| multicastLimit | integer | Maximum recipients for a multicast packet | YES |
| revision | integer | Network config revision counter | no |
| routes | array[object] | Managed IPv4 and IPv6 routes; see below | YES |
| ipAssignmentPools | array[object] | IP auto-assign ranges; see below | YES |
| rules | array[object] | Traffic rules; see below | YES |
| capabilities | array[object] | Array of capability objects (see below) | YES |
| tags | array[object] | Array of tag objects (see below) | YES |
| remoteTraceTarget | string | 10-digit ZeroTier ID of remote trace target | YES |
| remoteTraceLevel | integer | Remote trace verbosity level | YES |
* Networks without rules won't carry any traffic. If you don't specify any on network creation an "accept anything" rule set will automatically be added.
* Managed IP address assignments and IP assignment pools that do not fall within a route configured in `routes` are ignored and won't be used or sent to members.
* The default for `private` is `true` and this is probably what you want. Turning `private` off means *anyone* can join your network with only its 16-digit network ID. It's also impossible to de-authorize a member as these networks don't issue or enforce certificates. Such "party line" networks are used for decentralized app backplanes, gaming, and testing but are otherwise not common.
* Changing the MTU can be disruptive and on some operating systems may require a leave/rejoin of the network or a restart of the ZeroTier service.
**Auto-Assign Modes:**
Auto assign modes (`v4AssignMode` and `v6AssignMode`) contain objects that map assignment modes to booleans.
For IPv4 the only valid setting is `zt` which, if true, causes IPv4 addresses to be auto-assigned from `ipAssignmentPools` to members that do not have an IPv4 assignment. Note that active bridges are exempt and will not get auto-assigned IPs since this can interfere with bridging. (You can still manually assign one if you want.)
IPv6 includes this option and two others: `6plane` and `rfc4193`. These assign private IPv6 addresses to each member based on a deterministic assignment scheme that allows members to emulate IPv6 NDP to skip multicast for better performance and scalability. The `rfc4193` mode gives every member a /128 on a /88 network, while `6plane` gives every member a /80 within a /40 network but uses NDP emulation to route *all* IPs under that /80 to its owner. The `6plane` mode is great for use cases like Docker since it allows every member to assign IPv6 addresses within its /80 that just work instantly and globally across the network.
**IP assignment pool object format:**
| Field | Type | Description |
| --------------------- | ------------- | ------------------------------------------------- |
| ipRangeStart | string | Starting IP address in range |
| ipRangeEnd | string | Ending IP address in range (inclusive) |
Pools are only used if auto-assignment is on for the given address type (IPv4 or IPv6) and if the entire range falls within a managed route.
IPv6 ranges work just like IPv4 ranges and look like this:
{
"ipRangeStart": "fd00:feed:feed:beef:0000:0000:0000:0000",
"ipRangeEnd": "fd00:feed:feed:beef:ffff:ffff:ffff:ffff"
}
(You can POST a shortened-form IPv6 address but the API will always report back un-shortened canonical form addresses.)
That defines a range within network `fd00:feed:feed:beef::/64` that contains up to 2^64 addresses. If an IPv6 range is large enough, the controller will assign addresses by placing each member's device ID into the address in a manner similar to the RFC4193 and 6PLANE modes. Otherwise it will assign addresses at random.
**Managed Route object format:**
| Field | Type | Description |
| --------------------- | ------------- | ------------------------------------------------- |
| target | string | Subnet in CIDR notation |
| via | string/null | Next hop router IP address |
Managed Route objects look like this:
{
"target": "10.147.20.0/24"
}
or
{
"target": "192.168.168.0/24",
"via": "10.147.20.1"
}
**Rule object format:**
Each rule is actually a sequence of zero or more `MATCH_` entries in the rule array followed by an `ACTION_` entry that describes what to do if all the preceding entries match. An `ACTION_` without any preceding `MATCH_` entries is always taken, so setting a single `ACTION_ACCEPT` rule yields a network that allows all traffic. If no rules are present the default action is `ACTION_DROP`.
Rules are evaluated in the order in which they appear in the array. There is currently a limit of 256 entries per network. Capabilities should be used if a larger and more complex rule set is needed since they allow rules to be grouped by purpose and only shipped to members that need them.
Each rule table entry has two common fields.
| Field | Type | Description |
| --------------------- | ------------- | ------------------------------------------------- |
| type | string | Entry type (all caps, case sensitive) |
| not | boolean | If true, MATCHes match if they don't match |
The following fields may or may not be present depending on rule type:
| Field | Type | Description |
| --------------------- | ------------- | ------------------------------------------------- |
| zt | string | 10-digit hex ZeroTier address |
| etherType | integer | Ethernet frame type |
| mac | string | Hex MAC address (with or without :'s) |
| ip | string | IPv4 or IPv6 address |
| ipTos | integer | IP type of service |
| ipProtocol | integer | IP protocol (e.g. TCP) |
| start | integer | Start of an integer range (e.g. port range) |
| end | integer | End of an integer range (inclusive) |
| id | integer | Tag ID |
| value | integer | Tag value or comparison value |
| mask | integer | Bit mask (for characteristics flags) |
The entry types and their additional fields are:
| Entry type | Description | Fields |
| ------------------------------- | ----------------------------------------------------------------- | -------------- |
| `ACTION_DROP` | Drop any packets matching this rule | (none) |
| `ACTION_ACCEPT` | Accept any packets matching this rule | (none) |
| `ACTION_TEE` | Send a copy of this packet to a node (rule parsing continues) | `zt` |
| `ACTION_REDIRECT` | Redirect this packet to another node | `zt` |
| `ACTION_DEBUG_LOG` | Output debug info on match (if built with rules engine debug) | (none) |
| `MATCH_SOURCE_ZEROTIER_ADDRESS` | Match VL1 ZeroTier address of packet sender. | `zt` |
| `MATCH_DEST_ZEROTIER_ADDRESS` | Match VL1 ZeroTier address of recipient | `zt` |
| `MATCH_ETHERTYPE` | Match Ethernet frame type | `etherType` |
| `MATCH_MAC_SOURCE` | Match source Ethernet MAC address | `mac` |
| `MATCH_MAC_DEST` | Match destination Ethernet MAC address | `mac` |
| `MATCH_IPV4_SOURCE` | Match source IPv4 address | `ip` |
| `MATCH_IPV4_DEST` | Match destination IPv4 address | `ip` |
| `MATCH_IPV6_SOURCE` | Match source IPv6 address | `ip` |
| `MATCH_IPV6_DEST` | Match destination IPv6 address | `ip` |
| `MATCH_IP_TOS` | Match IP TOS field | `ipTos` |
| `MATCH_IP_PROTOCOL` | Match IP protocol field | `ipProtocol` |
| `MATCH_IP_SOURCE_PORT_RANGE` | Match a source IP port range | `start`,`end` |
| `MATCH_IP_DEST_PORT_RANGE` | Match a destination IP port range | `start`,`end` |
| `MATCH_CHARACTERISTICS` | Match on characteristics flags | `mask`,`value` |
| `MATCH_FRAME_SIZE_RANGE` | Match a range of Ethernet frame sizes | `start`,`end` |
| `MATCH_TAGS_SAMENESS` | Match if both sides' tags differ by no more than value | `id`,`value` |
| `MATCH_TAGS_BITWISE_AND` | Match if both sides' tags AND to value | `id`,`value` |
| `MATCH_TAGS_BITWISE_OR` | Match if both sides' tags OR to value | `id`,`value` |
| `MATCH_TAGS_BITWISE_XOR` | Match if both sides' tags XOR to value | `id`,`value` |
Important notes about rules engine behavior:
* IPv4 and IPv6 IP address rules do not match for frames that are not IPv4 or IPv6 respectively.
* `ACTION_DEBUG_LOG` is a no-op on nodes not built with `ZT_RULES_ENGINE_DEBUGGING` enabled (see Network.cpp). If that is enabled nodes will dump a trace of rule evaluation results to *stdout* when this action is encountered but will otherwise keep evaluating rules. This is used for basic "smoke testing" of the rules engine.
* Multicast packets and packets destined for bridged devices treated a little differently. They are matched more than once. They are matched at the point of send with a NULL ZeroTier destination address, meaning that `MATCH_DEST_ZEROTIER_ADDRESS` is useless. That's because the true VL1 destination is not yet known. Then they are matched again for each true VL1 destination. On these later subsequent matches TEE actions are ignored and REDIRECT rules are interpreted as DROPs. This prevents multiple TEE or REDIRECT packets from being sent to third party devices.
* Rules in capabilities are always matched as if the current device is the sender (inbound == false). A capability specifies sender side rules that can be enforced on both sides.
#### `/controller/network/<network ID>/member`
* Purpose: Get a set of all members on this network
* Methods: GET
* Returns: { object }
This returns a JSON object containing all member IDs as keys and their `memberRevisionCounter` values as values.
#### `/controller/network/<network ID>/member/<address>`
* Purpose: Create, authorize, or remove a network member
* Methods: GET, POST, DELETE
* Returns: { object }
| Field | Type | Description | Writable |
| --------------------- | ------------- | ------------------------------------------------- | -------- |
| id | string | Member's 10-digit ZeroTier address | no |
| address | string | Member's 10-digit ZeroTier address | no |
| nwid | string | 16-digit network ID | no |
| authorized | boolean | Is member authorized? (for private networks) | YES |
| activeBridge | boolean | Member is able to bridge to other Ethernet nets | YES |
| identity | string | Member's public ZeroTier identity (if known) | no |
| ipAssignments | array[string] | Managed IP address assignments | YES |
| revision | integer | Member revision counter | no |
| vMajor | integer | Most recently known major version | no |
| vMinor | integer | Most recently known minor version | no |
| vRev | integer | Most recently known revision | no |
| vProto | integer | Most recently known protocol version | no |
Note that managed IP assignments are only used if they fall within a managed route. Otherwise they are ignored.
Controller specific metrics are available from the `/metrics` endpoint.
| Metric Name | Type | Description |
| --- | --- | --- |
| controller_network_count | Gauge | number of networks the controller is serving |
| controller_member_count | Gauge | number of network members the controller is serving |
| controller_network_change_count | Counter | number of times a network configuration is changed |
| controller_member_change_count | Counter | number of times a network member configuration is changed |
| controller_member_auth_count | Counter | number of network member auths |
| controller_member_deauth_count | Counter | number of network member deauths|

View file

@ -1,10 +1,10 @@
# Dockerfile for ZeroTier Central Controllers
FROM registry.zerotier.com/zerotier/controller-builder:latest as builder
FROM registry.zerotier.com/zerotier/ctlbuild:latest as builder
MAINTAINER Adam Ierymekno <adam.ierymenko@zerotier.com>, Grant Limberg <grant.limberg@zerotier.com>
ADD . /ZeroTierOne
RUN export PATH=$PATH:~/.cargo/bin && cd ZeroTierOne && make clean && make central-controller -j8
FROM registry.zerotier.com/zerotier/controller-run:latest
FROM registry.zerotier.com/zerotier/ctlrun:latest
COPY --from=builder /ZeroTierOne/zerotier-one /usr/local/bin/zerotier-one
RUN chmod a+x /usr/local/bin/zerotier-one
RUN echo "/usr/local/lib64" > /etc/ld.so.conf.d/usr-local-lib64.conf && ldconfig

View file

@ -0,0 +1,16 @@
registry = registry.zerotier.com/zerotier
all: controller-builder controller-runbase
buildx:
@echo "docker buildx create"
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
docker run --privileged --rm tonistiigi/binfmt --install all
@echo docker buildx create --name multiarch --driver docker-container --use
@echo docker buildx inspect --bootstrap
controller-builder: buildx
docker buildx build --no-cache --platform linux/amd64,linux/arm64 -t $(registry)/ctlbuild:latest -f Dockerfile.builder . --push
controller-runbase: buildx
docker buildx build --no-cache --platform linux/amd64,linux/arm64 -t $(registry)/ctlrun:latest -f Dockerfile.run_base . --push

View file

@ -64,6 +64,7 @@ fi
popd
DEFAULT_PORT=9993
DEFAULT_LB_MODE=false
APP_NAME="controller-$(cat /var/lib/zerotier-one/identity.public | cut -d ':' -f 1)"
@ -76,6 +77,7 @@ echo "{
\"inot\",
\"nat64\"
],
\"lowBandwidthMode\": ${ZT_LB_MODE:-$DEFAULT_LB_MODE},
\"ssoRedirectURL\": \"${ZT_SSO_REDIRECT_URL}\",
\"allowManagementFrom\": [\"127.0.0.1\", \"::1\", \"10.0.0.0/8\"],
${REDIS}

File diff suppressed because it is too large Load diff

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more