Merge branch 'dev' into brenton/thread-safety-fixes

This commit is contained in:
Joseph Henry 2023-08-02 08:51:49 -07:00 committed by GitHub
commit 426514652f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
354 changed files with 32506 additions and 8858 deletions

View file

@ -38,6 +38,11 @@ local less_targets = [
{ "os": "linux", distro: "ubuntu", "name": "focal", "isas": [ "armv7", "amd64", "arm64" ], "events": [ "push", "tag", "custom" ] },
];
local native_targets = [
{ "os": "linux", distro: "debian", "name": "bullseye", "isas": [ "386", "armv7", "amd64", "arm64" ], "events": [ "push", "tag", "custom" ] },
];
local master_targets = [
//
// copypasta from here
@ -239,13 +244,13 @@ std.flattenArrays([
[
Index(p)
]
for p in targets
for p in native_targets
]) +
std.flattenArrays([
[
Test(p.os, p.distro, p.name, isa, p.events)
for isa in p.isas
]
for p in targets
for p in native_targets
])

8277
.drone.yml

File diff suppressed because it is too large Load diff

View file

@ -13,3 +13,9 @@ echo -e "\nBytes of memory definitely lost: $DEFINITELY_LOST"
if [[ "$DEFINITELY_LOST" -gt 0 ]]; then
exit 1
fi
EXIT_TEST_FAILED=$(cat *test-results/*summary.json | jq .exit_test_failed)
if [[ "$EXIT_TEST_FAILED" -gt 0 ]]; then
exit 1
fi

View file

@ -3,12 +3,14 @@
# This test script joins Earth and pokes some stuff
TEST_NETWORK=8056c2e21c000001
RUN_LENGTH=10
RUN_LENGTH=60
TEST_FINISHED=false
ZTO_VER=$(git describe --tags $(git rev-list --tags --max-count=1))
ZTO_COMMIT=$(git rev-parse HEAD)
ZTO_COMMIT_SHORT=$(git rev-parse --short HEAD)
TEST_DIR_PREFIX="$ZTO_VER-$ZTO_COMMIT_SHORT-test-results"
EXIT_TEST_FAILED=0
echo "Performing test on: $ZTO_VER-$ZTO_COMMIT_SHORT"
TEST_FILEPATH_PREFIX="$TEST_DIR_PREFIX/$ZTO_COMMIT_SHORT"
mkdir $TEST_DIR_PREFIX
@ -16,15 +18,19 @@ mkdir $TEST_DIR_PREFIX
################################################################################
# Multi-node connectivity and performance test #
################################################################################
main() {
echo -e "\nRunning test for $RUN_LENGTH seconds"
check_exit_on_invalid_identity
NS1="ip netns exec ns1"
NS2="ip netns exec ns2"
ZT1="$NS1 ./zerotier-cli -D$(pwd)/node1"
ZT1="$NS1 ./zerotier-cli -p9996 -D$(pwd)/node1"
# Specify custom port on one node to ensure that feature works
ZT2="$NS2 ./zerotier-cli -p9997 -D$(pwd)/node2"
echo -e "Setting up network namespaces..."
echo -e "\nSetting up network namespaces..."
echo "Setting up ns1"
ip netns add ns1
@ -93,7 +99,7 @@ FILENAME_MEMORY_LOG="$TEST_FILEPATH_PREFIX-memory.log"
echo -e "\nStarting a ZeroTier instance in each namespace..."
time_test_start=`date +%s`
time_test_start=$(date +%s)
# Spam the CLI as ZeroTier is starting
spam_cli 100
@ -104,7 +110,7 @@ $NS1 sudo valgrind --demangle=yes --exit-on-first-error=yes \
--xml=yes \
--xml-file=$FILENAME_MEMORY_LOG \
--leak-check=full \
./zerotier-one node1 -U >>node_1.log 2>&1 &
./zerotier-one node1 -p9996 -U >>node_1.log 2>&1 &
# Second instance, not run in memory profiler
$NS2 sudo ./zerotier-one node2 -U -p9997 >>node_2.log 2>&1 &
@ -113,62 +119,25 @@ $NS2 sudo ./zerotier-one node2 -U -p9997 >>node_2.log 2>&1 &
# Online Check #
################################################################################
spam_cli()
{
echo "Spamming CLI..."
# Rapidly spam the CLI with joins/leaves
MAX_TRIES="${$1:-10}"
for ((s=0; s<=MAX_TRIES; s++))
do
$ZT1 status
$ZT2 status
sleep 0.1
done
SPAM_TRIES=128
for ((s=0; s<=SPAM_TRIES; s++))
do
$ZT1 join $TEST_NETWORK
done
for ((s=0; s<=SPAM_TRIES; s++))
do
$ZT1 leave $TEST_NETWORK
done
for ((s=0; s<=SPAM_TRIES; s++))
do
$ZT1 leave $TEST_NETWORK
$ZT1 join $TEST_NETWORK
done
}
echo "Waiting for ZeroTier to come online before attempting test..."
MAX_WAIT_SECS="${MAX_WAIT_SECS:-120}"
node1_online=false
node2_online=false
both_instances_online=false
time_zt_node1_start=`date +%s`
time_zt_node2_start=`date +%s`
time_zt_node1_start=$(date +%s)
time_zt_node2_start=$(date +%s)
for ((s=0; s<=MAX_WAIT_SECS; s++))
do
for ((s = 0; s <= MAX_WAIT_SECS; s++)); do
node1_online="$($ZT1 -j info | jq '.online' 2>/dev/null)"
node2_online="$($ZT2 -j info | jq '.online' 2>/dev/null)"
echo "Checking for online status: try #$s, node1:$node1_online, node2:$node2_online"
if [[ "$node1_online" == "true" ]]
then
time_zt_node1_online=`date +%s`
if [[ "$node1_online" == "true" ]]; then
time_zt_node1_online=$(date +%s)
fi
if [[ "$node2_online" == "true" ]]
then
time_zt_node2_online=`date +%s`
if [[ "$node2_online" == "true" ]]; then
time_zt_node2_online=$(date +%s)
fi
if [[ "$node2_online" == "true" && "$node1_online" == "true" ]]
then
if [[ "$node2_online" == "true" && "$node1_online" == "true" ]]; then
both_instances_online=true
break
fi
@ -195,8 +164,7 @@ $ZT1 status
echo -e "\n\nNode 2:"
$ZT2 status
if [[ "$both_instances_online" != "true" ]]
then
if [[ "$both_instances_online" != "true" ]]; then
echo "One or more instances of ZeroTier failed to come online. Aborting test."
exit 1
fi
@ -226,9 +194,9 @@ $NS2 ping -c 16 $node1_ip4 > $PING21_FILENAME
ping_loss_percent_1_to_2="${ping_loss_percent_1_to_2:-100.0}"
ping_loss_percent_2_to_1="${ping_loss_percent_2_to_1:-100.0}"
ping_loss_percent_1_to_2=$(cat $PING12_FILENAME | \
ping_loss_percent_1_to_2=$(cat $PING12_FILENAME |
grep "packet loss" | awk '{print $6}' | sed 's/%//')
ping_loss_percent_2_to_1=$(cat $PING21_FILENAME | \
ping_loss_percent_2_to_1=$(cat $PING21_FILENAME |
grep "packet loss" | awk '{print $6}' | sed 's/%//')
# Normalize loss value
@ -341,7 +309,7 @@ echo -e "\nStopping memory check..."
sudo pkill -15 -f valgrind
sleep 10
time_test_end=`date +%s`
time_test_end=$(date +%s)
################################################################################
# Rename ZeroTier stdout/stderr logs #
@ -403,7 +371,8 @@ time_length_zt_node2_online=$((time_zt_node2_online-time_zt_start))
#time_length_zt_leave=$((time_zt_leave_end-time_zt_leave_start))
#time_length_zt_can_still_ping=$((time_zt_can_still_ping-time_zt_leave_start))
summary=$(cat <<EOF
summary=$(
cat <<EOF
{
"version":"$ZTO_VER",
"commit":"$ZTO_COMMIT",
@ -426,12 +395,70 @@ summary=$(cat <<EOF
"mean_latency_ping_netns": $POSSIBLY_LOST,
"mean_pdv_random": $POSSIBLY_LOST,
"mean_pdv_netns": $POSSIBLY_LOST,
"mean_perf_netns": $POSSIBLY_LOST
"mean_perf_netns": $POSSIBLY_LOST,
"exit_test_failed": $EXIT_TEST_FAILED
}
EOF
)
echo $summary >$FILENAME_SUMMARY
cat $FILENAME_SUMMARY
}
"$@"
################################################################################
# CLI Check #
################################################################################
spam_cli() {
echo "Spamming CLI..."
# Rapidly spam the CLI with joins/leaves
MAX_TRIES="${1:-10}"
for ((s = 0; s <= MAX_TRIES; s++)); do
$ZT1 status
$ZT2 status
sleep 0.1
done
SPAM_TRIES=128
for ((s = 0; s <= SPAM_TRIES; s++)); do
$ZT1 join $TEST_NETWORK
done
for ((s = 0; s <= SPAM_TRIES; s++)); do
$ZT1 leave $TEST_NETWORK
done
for ((s = 0; s <= SPAM_TRIES; s++)); do
$ZT1 leave $TEST_NETWORK
$ZT1 join $TEST_NETWORK
done
}
check_exit_on_invalid_identity() {
echo "Checking ZeroTier exits on invalid identity..."
mkdir -p $(pwd)/exit_test
ZT1="sudo ./zerotier-one -p9999 $(pwd)/exit_test"
echo "asdfasdfasdfasdf" > $(pwd)/exit_test/identity.secret
echo "asdfasdfasdfasdf" > $(pwd)/exit_test/authtoken.secret
echo "Launch ZeroTier with an invalid identity"
$ZT1 &
my_pid=$!
echo "Waiting 5 secons"
sleep 5
# check if process is running
kill -0 $my_pid
if [ $? -eq 0 ]; then
EXIT_TEST_FAILED=1
echo "Exit test FAILED: Process still running after being fed an invalid identity"
else
echo "Exit test PASSED"
fi
}
main "$@"

View file

@ -37,7 +37,6 @@ The base path contains the ZeroTier One service main entry point (`one.cpp`), se
- `ext/`: third party libraries, binaries that we ship for convenience on some platforms (Mac and Windows), and installation support files.
- `include/`: include files for the ZeroTier core.
- `java/`: a JNI wrapper used with our Android mobile app. (The whole Android app is not open source but may be made so in the future.)
- `macui/`: a Macintosh menu-bar app for controlling ZeroTier One, written in Objective C.
- `node/`: the ZeroTier virtual Ethernet switch core, which is designed to be entirely separate from the rest of the code and able to be built as a stand-alone OS-independent library. Note to developers: do not use C++11 features in here, since we want this to build on old embedded platforms that lack C++11 support. C++11 can be used elsewhere.
- `osdep/`: code to support and integrate with OSes, including platform-specific stuff only built for certain targets.
- `rule-compiler/`: JavaScript rules language compiler for defining network-level rules.
@ -113,18 +112,18 @@ Additional help can be found in our [knowledge base](https://zerotier.atlassian.
### Prometheus Metrics
Prometheus Metrics are available at the `/metrics` API endpoint. This endpoint is protected by an API key stored in `authtoken.secret` because of the possibility of information leakage. Information that could be gleaned from the metrics include joined networks and peers your instance is talking to.
Prometheus Metrics are available at the `/metrics` API endpoint. This endpoint is protected by an API key stored in `metricstoken.secret` to prevent unwanted information leakage. Information that could be gleaned from the metrics include joined networks and peers your instance is talking to.
Access control is via the ZeroTier control interface itself and `authtoken.secret`. This can be sent as the `X-ZT1-Auth` HTTP header field or appended to the URL as `?auth=<token>`. You can see the current metrics via `cURL` with the following command:
Access control is via the ZeroTier control interface itself and `metricstoken.secret`. This can be sent as a bearer auth token, via the `X-ZT1-Auth` HTTP header field, or appended to the URL as `?auth=<token>`. You can see the current metrics via `cURL` with the following command:
// Linux
curl -H "X-ZT1-Auth: $(sudo cat /var/lib/zerotier-one/authtoken.secret)" http://localhost:9993/metrics
curl -H "X-ZT1-Auth: $(sudo cat /var/lib/zerotier-one/metricstoken.secret)" http://localhost:9993/metrics
// macOS
curl -H "X-XT1-Auth: $(sudo cat /Library/Application\ Support/ZeroTier/One/authtoken.secret)" http://localhost:9993/metrics
curl -H "X-XT1-Auth: $(sudo cat /Library/Application\ Support/ZeroTier/One/metricstoken.secret)" http://localhost:9993/metrics
// Windows PowerShell (Admin)
Invoke-RestMethod -Headers @{'X-ZT1-Auth' = "$(Get-Content C:\ProgramData\ZeroTier\One\authtoken.secret)"; } -Uri http://localhost:9993/metrics
Invoke-RestMethod -Headers @{'X-ZT1-Auth' = "$(Get-Content C:\ProgramData\ZeroTier\One\metricstoken.secret)"; } -Uri http://localhost:9993/metrics
To configure a scrape job in Prometheus on the machine ZeroTier is running on, add this to your Prometheus `scrape_config`:
@ -137,24 +136,23 @@ To configure a scrape job in Prometheus on the machine ZeroTier is running on, a
- 127.0.0.1:9993
labels:
group: zerotier-one
params:
auth:
- $YOUR_AUTHTOKEN_SECRET
If your Prometheus instance is remote from the machine ZeroTier instance, you'll have to edit your `local.conf` file to allow remote access to the API control port. If your local lan is `10.0.0.0/24`, edit your `local.conf` as follows:
{
"settings": {
"allowManagementFrom:" ["10.0.0.0/24"]
}
}
Substitute your actual network IP ranges as necessary.
It's also possible to access the metrics & control port over the ZeroTier network itself via the same method shown above. Just add the address range of your ZeroTier network to the list. NOTE: Using this method means that anyone with your auth token can control your ZeroTier instance, including leaving & joining other networks.
node_id: $YOUR_10_CHARACTER_NODE_ID
authorization:
credentials: $YOUR_METRICS_TOKEN_SECRET
If neither of these methods are desirable, it is probably possible to distribute metrics via [Prometheus Proxy](https://github.com/pambrose/prometheus-proxy) or some other tool. Note: We have not tested this internally, but will probably work with the correct configuration.
Metrics are also available on disk in ZeroTier's working directory:
// Linux
/var/lib/zerotier-one/metrics.prom
// macOS
/Library/Application\ Support/ZeroTier/One/metrics.prom
//Windows
C:\ProgramData\ZeroTier\One\metrics.prom
#### Available Metrics
| Metric Name | Labels | Metric Type | Description |

View file

@ -1,3 +1,3 @@
#!/bin/bash
c++ -std=c++11 -I../.. -I.. -g -o mkworld ../../node/C25519.cpp ../../node/Salsa20.cpp ../../node/SHA512.cpp ../../node/Identity.cpp ../../node/Utils.cpp ../../node/InetAddress.cpp ../../osdep/OSUtils.cpp mkworld.cpp -lm
c++ -std=c++11 -I../.. -I../../ext -I.. -g -o mkworld ../../node/C25519.cpp ../../node/Salsa20.cpp ../../node/SHA512.cpp ../../node/Identity.cpp ../../node/Utils.cpp ../../node/InetAddress.cpp ../../osdep/OSUtils.cpp mkworld.cpp -lm

View file

@ -466,11 +466,21 @@ EmbeddedNetworkController::EmbeddedNetworkController(Node *node,const char *ztPa
, _node(node)
, _ztPath(ztPath)
, _path(dbPath)
, _signingId()
, _signingIdAddressString()
, _sender((NetworkController::Sender *)0)
, _db(this)
, _queue()
, _threads()
, _threads_l()
, _memberStatus()
, _memberStatus_l()
, _expiringSoon()
, _expiringSoon_l()
, _rc(rc)
, _ssoExpiryRunning(true)
, _ssoExpiry(std::thread(&EmbeddedNetworkController::_ssoExpiryThread, this))
, _rc(rc)
#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
, _member_status_lookup{"nc_member_status_lookup",""}
, _member_status_lookup_count{"nc_member_status_lookup_count",""}

View file

@ -139,9 +139,6 @@ private:
std::vector<std::thread> _threads;
std::mutex _threads_l;
bool _ssoExpiryRunning;
std::thread _ssoExpiry;
std::unordered_map< _MemberStatusKey,_MemberStatus,_MemberStatusHash > _memberStatus;
std::mutex _memberStatus_l;
@ -151,6 +148,9 @@ private:
RedisConfig *_rc;
std::string _ssoRedirectURL;
bool _ssoExpiryRunning;
std::thread _ssoExpiry;
#ifdef CENTRAL_CONTROLLER_REQUEST_BENCHMARK
prometheus::simpleapi::benchmark_family_t _member_status_lookup;
prometheus::simpleapi::counter_family_t _member_status_lookup_count;

View file

@ -460,7 +460,7 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
"LEFT OUTER JOIN ztc_network_oidc_config noc "
" ON noc.network_id = n.id "
"LEFT OUTER JOIN ztc_oidc_config oc "
" ON noc.client_id = oc.client_id AND noc.org_id = o.org_id "
" ON noc.client_id = oc.client_id AND oc.org_id = o.org_id "
"WHERE n.id = $1 AND n.sso_enabled = true", networkId);
std::string client_id = "";
@ -527,9 +527,6 @@ AuthInfo PostgreSQL::getSSOAuthInfo(const nlohmann::json &member, const std::str
_pool->unborrow(c);
} catch (std::exception &e) {
if (c) {
_pool->unborrow(c);
}
fprintf(stderr, "ERROR: Error updating member on load for network %s: %s\n", networkId.c_str(), e.what());
}
@ -1051,7 +1048,6 @@ void PostgreSQL::heartbeat()
w.commit();
} catch (std::exception &e) {
fprintf(stderr, "%s: Heartbeat update failed: %s\n", controllerId, e.what());
_pool->unborrow(c);
std::this_thread::sleep_for(std::chrono::milliseconds(1000));
continue;
}

View file

@ -107,7 +107,7 @@ log_params "Writing healthcheck for networks:" $@
cat >/healthcheck.sh <<EOF
#!/bin/bash
for i in $@
for i in $@ $ZEROTIER_JOIN_NETWORKS
do
[ "\$(zerotier-cli get \$i status)" = "OK" ] || exit 1
done

View file

@ -1,10 +1,10 @@
# Dockerfile for ZeroTier Central Controllers
FROM registry.zerotier.com/zerotier/controller-builder:latest as builder
FROM registry.zerotier.com/zerotier/ctlbuild:latest as builder
MAINTAINER Adam Ierymekno <adam.ierymenko@zerotier.com>, Grant Limberg <grant.limberg@zerotier.com>
ADD . /ZeroTierOne
RUN export PATH=$PATH:~/.cargo/bin && cd ZeroTierOne && make clean && make central-controller -j8
FROM registry.zerotier.com/zerotier/controller-run:latest
FROM registry.zerotier.com/zerotier/ctlrun:latest
COPY --from=builder /ZeroTierOne/zerotier-one /usr/local/bin/zerotier-one
RUN chmod a+x /usr/local/bin/zerotier-one
RUN echo "/usr/local/lib64" > /etc/ld.so.conf.d/usr-local-lib64.conf && ldconfig

View file

@ -0,0 +1,16 @@
registry = registry.zerotier.com/zerotier
all: controller-builder controller-runbase
buildx:
@echo "docker buildx create"
# docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
docker run --privileged --rm tonistiigi/binfmt --install all
@echo docker buildx create --name multiarch --driver docker-container --use
@echo docker buildx inspect --bootstrap
controller-builder: buildx
docker buildx build --no-cache --platform linux/amd64,linux/arm64 -t $(registry)/ctlbuild:latest -f Dockerfile.builder . --push
controller-runbase: buildx
docker buildx build --no-cache --platform linux/amd64,linux/arm64 -t $(registry)/ctlrun:latest -f Dockerfile.run_base . --push

View file

@ -64,6 +64,7 @@ fi
popd
DEFAULT_PORT=9993
DEFAULT_LB_MODE=false
APP_NAME="controller-$(cat /var/lib/zerotier-one/identity.public | cut -d ':' -f 1)"
@ -76,6 +77,7 @@ echo "{
\"inot\",
\"nat64\"
],
\"lowBandwidthMode\": ${ZT_LB_MODE:-$DEFAULT_LB_MODE},
\"ssoRedirectURL\": \"${ZT_SSO_REDIRECT_URL}\",
\"allowManagementFrom\": [\"127.0.0.1\", \"::1\", \"10.0.0.0/8\"],
${REDIS}

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more