mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-06-07 04:53:44 +02:00
AES software optimizations to make rpi4 faster.
This commit is contained in:
parent
d0cc3ac333
commit
78670aea58
5 changed files with 110 additions and 31 deletions
|
@ -23,19 +23,20 @@ func Help() {
|
||||||
(c)2013-2020 ZeroTier, Inc.
|
(c)2013-2020 ZeroTier, Inc.
|
||||||
Licensed under the ZeroTier BSL (see LICENSE.txt)
|
Licensed under the ZeroTier BSL (see LICENSE.txt)
|
||||||
|
|
||||||
Usage: zerotier [-options] <command> [command args]
|
Usage: zerotier [-global options] <command> [command args]
|
||||||
|
|
||||||
Global Options:
|
Global Options:
|
||||||
|
|
||||||
-j Output raw JSON where applicable
|
-j Output raw JSON where applicable
|
||||||
-p <path> Use alternate base path
|
-p <path> Use alternate base path
|
||||||
-t <path> Load secret auth token from a file
|
-t <path> Load secret auth token from a file
|
||||||
-T <token> Set secret auth token on command line
|
-T <token> Set secret auth token on command line
|
||||||
|
|
||||||
Commands:
|
Common Operations:
|
||||||
|
|
||||||
help Show this help
|
help Show this help
|
||||||
version Print version
|
version Print version
|
||||||
service [-options] Start node (see below)
|
|
||||||
-d Fork into background (Unix only)
|
|
||||||
status Show node status and configuration
|
status Show node status and configuration
|
||||||
|
|
||||||
join [-options] <network> Join a virtual network
|
join [-options] <network> Join a virtual network
|
||||||
|
@ -58,13 +59,17 @@ Commands:
|
||||||
show Show peer details (default)
|
show Show peer details (default)
|
||||||
try <endpoint> [...] Try peer at explicit endpoint
|
try <endpoint> [...] Try peer at explicit endpoint
|
||||||
|
|
||||||
set [option] [value] - Get or set a core config option
|
set [option] [value] - Get or set node configuration
|
||||||
port <port> Primary P2P port
|
port <port> Primary P2P port
|
||||||
secondaryport <port/0> Secondary P2P port (0 to disable)
|
secondaryport <port/0> Secondary P2P port (0 to disable)
|
||||||
blacklist cidr <IP/bits> <boolean> Toggle physical path blacklisting
|
blacklist cidr <IP/bits> <boolean> Toggle physical path blacklisting
|
||||||
blacklist if <prefix> <boolean> Toggle interface prefix blacklisting
|
blacklist if <prefix> <boolean> Toggle interface prefix blacklisting
|
||||||
portmap <boolean> Toggle use of uPnP or NAT-PMP
|
portmap <boolean> Toggle use of uPnP or NAT-PMP
|
||||||
|
|
||||||
|
Advanced Operations:
|
||||||
|
|
||||||
|
service Start node (seldom used from CLI)
|
||||||
|
|
||||||
controller <command> [option] - Local controller management commands
|
controller <command> [option] - Local controller management commands
|
||||||
networks List networks run by local controller
|
networks List networks run by local controller
|
||||||
new Create a new network
|
new Create a new network
|
||||||
|
|
119
core/AES.cpp
119
core/AES.cpp
|
@ -1057,31 +1057,99 @@ void AES::CTR::crypt(const void *const input, unsigned int len) noexcept
|
||||||
out += totalLen;
|
out += totalLen;
|
||||||
_len = (totalLen + len);
|
_len = (totalLen + len);
|
||||||
|
|
||||||
#ifdef ZT_NO_UNALIGNED_ACCESS
|
const uint32_t *const restrict rk = _aes._k.sw.ek;
|
||||||
if ((((uintptr_t)out | (uintptr_t)in) & 7U) == 0) { // if aligned we can do XORs in quadwords instead of bytes
|
const uint32_t m8 = 0xff;
|
||||||
#endif
|
const uint32_t ctr0rk0 = Utils::ntoh(reinterpret_cast<uint32_t *>(_ctr)[0]) ^ rk[0];
|
||||||
while (len >= 16) {
|
const uint32_t ctr1rk1 = Utils::ntoh(reinterpret_cast<uint32_t *>(_ctr)[1]) ^ rk[1];
|
||||||
_aes._encryptSW(reinterpret_cast<const uint8_t *>(_ctr), reinterpret_cast<uint8_t *>(keyStream));
|
const uint32_t ctr2rk2 = Utils::ntoh(reinterpret_cast<uint32_t *>(_ctr)[2]) ^ rk[2];
|
||||||
reinterpret_cast<uint32_t *>(_ctr)[3] = Utils::hton(++ctr);
|
const uint32_t m8_24 = 0xff000000;
|
||||||
reinterpret_cast<uint64_t *>(out)[0] = reinterpret_cast<const uint64_t *>(in)[0] ^ keyStream[0];
|
const uint32_t m8_16 = 0x00ff0000;
|
||||||
reinterpret_cast<uint64_t *>(out)[1] = reinterpret_cast<const uint64_t *>(in)[1] ^ keyStream[1];
|
const uint32_t m8_8 = 0x0000ff00;
|
||||||
out += 16;
|
while (len >= 16) {
|
||||||
len -= 16;
|
uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
|
||||||
in += 16;
|
s0 = ctr0rk0;
|
||||||
}
|
s1 = ctr1rk1;
|
||||||
#ifdef ZT_NO_UNALIGNED_ACCESS
|
s2 = ctr2rk2;
|
||||||
} else {
|
s3 = ctr++ ^ rk[3];
|
||||||
while (len >= 16) {
|
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[4];
|
||||||
_aes._encryptSW(reinterpret_cast<const uint8_t *>(_ctr),reinterpret_cast<uint8_t *>(keyStream));
|
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[5];
|
||||||
reinterpret_cast<uint32_t *>(_ctr)[3] = Utils::hton(++ctr);
|
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[6];
|
||||||
for (int i = 0;i < 16;++i)
|
t3 = Te0[s3 >> 24U] ^ Te1[(s0 >> 16U) & m8] ^ Te2[(s1 >> 8U) & m8] ^ Te3[s2 & m8] ^ rk[7];
|
||||||
out[i] = in[i] ^ reinterpret_cast<uint8_t *>(keyStream)[i];
|
s0 = Te0[t0 >> 24U] ^ Te1[(t1 >> 16U) & m8] ^ Te2[(t2 >> 8U) & m8] ^ Te3[t3 & m8] ^ rk[8];
|
||||||
out += 16;
|
s1 = Te0[t1 >> 24U] ^ Te1[(t2 >> 16U) & m8] ^ Te2[(t3 >> 8U) & m8] ^ Te3[t0 & m8] ^ rk[9];
|
||||||
len -= 16;
|
s2 = Te0[t2 >> 24U] ^ Te1[(t3 >> 16U) & m8] ^ Te2[(t0 >> 8U) & m8] ^ Te3[t1 & m8] ^ rk[10];
|
||||||
in += 16;
|
s3 = Te0[t3 >> 24U] ^ Te1[(t0 >> 16U) & m8] ^ Te2[(t1 >> 8U) & m8] ^ Te3[t2 & m8] ^ rk[11];
|
||||||
}
|
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[12];
|
||||||
|
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[13];
|
||||||
|
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[14];
|
||||||
|
t3 = Te0[s3 >> 24U] ^ Te1[(s0 >> 16U) & m8] ^ Te2[(s1 >> 8U) & m8] ^ Te3[s2 & m8] ^ rk[15];
|
||||||
|
s0 = Te0[t0 >> 24U] ^ Te1[(t1 >> 16U) & m8] ^ Te2[(t2 >> 8U) & m8] ^ Te3[t3 & m8] ^ rk[16];
|
||||||
|
s1 = Te0[t1 >> 24U] ^ Te1[(t2 >> 16U) & m8] ^ Te2[(t3 >> 8U) & m8] ^ Te3[t0 & m8] ^ rk[17];
|
||||||
|
s2 = Te0[t2 >> 24U] ^ Te1[(t3 >> 16U) & m8] ^ Te2[(t0 >> 8U) & m8] ^ Te3[t1 & m8] ^ rk[18];
|
||||||
|
s3 = Te0[t3 >> 24U] ^ Te1[(t0 >> 16U) & m8] ^ Te2[(t1 >> 8U) & m8] ^ Te3[t2 & m8] ^ rk[19];
|
||||||
|
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[20];
|
||||||
|
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[21];
|
||||||
|
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[22];
|
||||||
|
t3 = Te0[s3 >> 24U] ^ Te1[(s0 >> 16U) & m8] ^ Te2[(s1 >> 8U) & m8] ^ Te3[s2 & m8] ^ rk[23];
|
||||||
|
s0 = Te0[t0 >> 24U] ^ Te1[(t1 >> 16U) & m8] ^ Te2[(t2 >> 8U) & m8] ^ Te3[t3 & m8] ^ rk[24];
|
||||||
|
s1 = Te0[t1 >> 24U] ^ Te1[(t2 >> 16U) & m8] ^ Te2[(t3 >> 8U) & m8] ^ Te3[t0 & m8] ^ rk[25];
|
||||||
|
s2 = Te0[t2 >> 24U] ^ Te1[(t3 >> 16U) & m8] ^ Te2[(t0 >> 8U) & m8] ^ Te3[t1 & m8] ^ rk[26];
|
||||||
|
s3 = Te0[t3 >> 24U] ^ Te1[(t0 >> 16U) & m8] ^ Te2[(t1 >> 8U) & m8] ^ Te3[t2 & m8] ^ rk[27];
|
||||||
|
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[28];
|
||||||
|
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[29];
|
||||||
|
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[30];
|
||||||
|
t3 = Te0[s3 >> 24U] ^ Te1[(s0 >> 16U) & m8] ^ Te2[(s1 >> 8U) & m8] ^ Te3[s2 & m8] ^ rk[31];
|
||||||
|
s0 = Te0[t0 >> 24U] ^ Te1[(t1 >> 16U) & m8] ^ Te2[(t2 >> 8U) & m8] ^ Te3[t3 & m8] ^ rk[32];
|
||||||
|
s1 = Te0[t1 >> 24U] ^ Te1[(t2 >> 16U) & m8] ^ Te2[(t3 >> 8U) & m8] ^ Te3[t0 & m8] ^ rk[33];
|
||||||
|
s2 = Te0[t2 >> 24U] ^ Te1[(t3 >> 16U) & m8] ^ Te2[(t0 >> 8U) & m8] ^ Te3[t1 & m8] ^ rk[34];
|
||||||
|
s3 = Te0[t3 >> 24U] ^ Te1[(t0 >> 16U) & m8] ^ Te2[(t1 >> 8U) & m8] ^ Te3[t2 & m8] ^ rk[35];
|
||||||
|
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[36];
|
||||||
|
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[37];
|
||||||
|
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[38];
|
||||||
|
t3 = Te0[s3 >> 24U] ^ Te1[(s0 >> 16U) & m8] ^ Te2[(s1 >> 8U) & m8] ^ Te3[s2 & m8] ^ rk[39];
|
||||||
|
s0 = Te0[t0 >> 24U] ^ Te1[(t1 >> 16U) & m8] ^ Te2[(t2 >> 8U) & m8] ^ Te3[t3 & m8] ^ rk[40];
|
||||||
|
s1 = Te0[t1 >> 24U] ^ Te1[(t2 >> 16U) & m8] ^ Te2[(t3 >> 8U) & m8] ^ Te3[t0 & m8] ^ rk[41];
|
||||||
|
s2 = Te0[t2 >> 24U] ^ Te1[(t3 >> 16U) & m8] ^ Te2[(t0 >> 8U) & m8] ^ Te3[t1 & m8] ^ rk[42];
|
||||||
|
s3 = Te0[t3 >> 24U] ^ Te1[(t0 >> 16U) & m8] ^ Te2[(t1 >> 8U) & m8] ^ Te3[t2 & m8] ^ rk[43];
|
||||||
|
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[44];
|
||||||
|
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[45];
|
||||||
|
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[46];
|
||||||
|
t3 = Te0[s3 >> 24U] ^ Te1[(s0 >> 16U) & m8] ^ Te2[(s1 >> 8U) & m8] ^ Te3[s2 & m8] ^ rk[47];
|
||||||
|
s0 = Te0[t0 >> 24U] ^ Te1[(t1 >> 16U) & m8] ^ Te2[(t2 >> 8U) & m8] ^ Te3[t3 & m8] ^ rk[48];
|
||||||
|
s1 = Te0[t1 >> 24U] ^ Te1[(t2 >> 16U) & m8] ^ Te2[(t3 >> 8U) & m8] ^ Te3[t0 & m8] ^ rk[49];
|
||||||
|
s2 = Te0[t2 >> 24U] ^ Te1[(t3 >> 16U) & m8] ^ Te2[(t0 >> 8U) & m8] ^ Te3[t1 & m8] ^ rk[50];
|
||||||
|
s3 = Te0[t3 >> 24U] ^ Te1[(t0 >> 16U) & m8] ^ Te2[(t1 >> 8U) & m8] ^ Te3[t2 & m8] ^ rk[51];
|
||||||
|
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[52];
|
||||||
|
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[53];
|
||||||
|
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[54];
|
||||||
|
t3 = Te0[s3 >> 24U] ^ Te1[(s0 >> 16U) & m8] ^ Te2[(s1 >> 8U) & m8] ^ Te3[s2 & m8] ^ rk[55];
|
||||||
|
s0 = (Te2[(t0 >> 24U)] & m8_24) ^ (Te3[(t1 >> 16U) & m8] & m8_16) ^ (Te0[(t2 >> 8U) & m8] & m8_8) ^ (Te1[(t3) & m8] & m8) ^ rk[56];
|
||||||
|
s1 = (Te2[(t1 >> 24U)] & m8_24) ^ (Te3[(t2 >> 16U) & m8] & m8_16) ^ (Te0[(t3 >> 8U) & m8] & m8_8) ^ (Te1[(t0) & m8] & m8) ^ rk[57];
|
||||||
|
s2 = (Te2[(t2 >> 24U)] & m8_24) ^ (Te3[(t3 >> 16U) & m8] & m8_16) ^ (Te0[(t0 >> 8U) & m8] & m8_8) ^ (Te1[(t1) & m8] & m8) ^ rk[58];
|
||||||
|
s3 = (Te2[(t3 >> 24U)] & m8_24) ^ (Te3[(t0 >> 16U) & m8] & m8_16) ^ (Te0[(t1 >> 8U) & m8] & m8_8) ^ (Te1[(t2) & m8] & m8) ^ rk[59];
|
||||||
|
|
||||||
|
out[0] = in[0] ^ (uint8_t)(s0 >> 24U);
|
||||||
|
out[1] = in[1] ^ (uint8_t)(s0 >> 16U);
|
||||||
|
out[2] = in[2] ^ (uint8_t)(s0 >> 8U);
|
||||||
|
out[3] = in[3] ^ (uint8_t)s0;
|
||||||
|
out[4] = in[4] ^ (uint8_t)(s1 >> 24U);
|
||||||
|
out[5] = in[5] ^ (uint8_t)(s1 >> 16U);
|
||||||
|
out[6] = in[6] ^ (uint8_t)(s1 >> 8U);
|
||||||
|
out[7] = in[7] ^ (uint8_t)s1;
|
||||||
|
out[8] = in[8] ^ (uint8_t)(s2 >> 24U);
|
||||||
|
out[9] = in[9] ^ (uint8_t)(s2 >> 16U);
|
||||||
|
out[10] = in[10] ^ (uint8_t)(s2 >> 8U);
|
||||||
|
out[11] = in[11] ^ (uint8_t)s2;
|
||||||
|
out[12] = in[12] ^ (uint8_t)(s3 >> 24U);
|
||||||
|
out[13] = in[13] ^ (uint8_t)(s3 >> 16U);
|
||||||
|
out[14] = in[14] ^ (uint8_t)(s3 >> 8U);
|
||||||
|
out[15] = in[15] ^ (uint8_t)s3;
|
||||||
|
|
||||||
|
out += 16;
|
||||||
|
len -= 16;
|
||||||
|
in += 16;
|
||||||
}
|
}
|
||||||
#endif
|
reinterpret_cast<uint32_t *>(_ctr)[3] = Utils::hton(ctr);
|
||||||
|
|
||||||
// Any remaining input is placed in _out. This will be picked up and crypted
|
// Any remaining input is placed in _out. This will be picked up and crypted
|
||||||
// on subsequent calls to crypt() or finish() as it'll mean _len will not be
|
// on subsequent calls to crypt() or finish() as it'll mean _len will not be
|
||||||
|
@ -1246,11 +1314,12 @@ void AES::_encryptSW(const uint8_t in[16], uint8_t out[16]) const noexcept
|
||||||
{
|
{
|
||||||
const uint32_t *const restrict rk = _k.sw.ek;
|
const uint32_t *const restrict rk = _k.sw.ek;
|
||||||
const uint32_t m8 = 0xff;
|
const uint32_t m8 = 0xff;
|
||||||
uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
|
uint32_t s0, s1, s2, s3;
|
||||||
s0 = Utils::loadBigEndian< uint32_t >(in) ^ rk[0];
|
s0 = Utils::loadBigEndian< uint32_t >(in) ^ rk[0];
|
||||||
s1 = Utils::loadBigEndian< uint32_t >(in + 4) ^ rk[1];
|
s1 = Utils::loadBigEndian< uint32_t >(in + 4) ^ rk[1];
|
||||||
s2 = Utils::loadBigEndian< uint32_t >(in + 8) ^ rk[2];
|
s2 = Utils::loadBigEndian< uint32_t >(in + 8) ^ rk[2];
|
||||||
s3 = Utils::loadBigEndian< uint32_t >(in + 12) ^ rk[3];
|
s3 = Utils::loadBigEndian< uint32_t >(in + 12) ^ rk[3];
|
||||||
|
uint32_t t0, t1, t2, t3;
|
||||||
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[4];
|
t0 = Te0[s0 >> 24U] ^ Te1[(s1 >> 16U) & m8] ^ Te2[(s2 >> 8U) & m8] ^ Te3[s3 & m8] ^ rk[4];
|
||||||
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[5];
|
t1 = Te0[s1 >> 24U] ^ Te1[(s2 >> 16U) & m8] ^ Te2[(s3 >> 8U) & m8] ^ Te3[s0 & m8] ^ rk[5];
|
||||||
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[6];
|
t2 = Te0[s2 >> 24U] ^ Te1[(s3 >> 16U) & m8] ^ Te2[(s0 >> 8U) & m8] ^ Te3[s1 & m8] ^ rk[6];
|
||||||
|
|
|
@ -18,6 +18,8 @@
|
||||||
#include "Utils.hpp"
|
#include "Utils.hpp"
|
||||||
#include "SHA512.hpp"
|
#include "SHA512.hpp"
|
||||||
|
|
||||||
|
//#define ZT_AES_NO_ACCEL
|
||||||
|
|
||||||
#if !defined(ZT_AES_NO_ACCEL) && defined(ZT_ARCH_X64)
|
#if !defined(ZT_AES_NO_ACCEL) && defined(ZT_ARCH_X64)
|
||||||
#define ZT_AES_AESNI 1
|
#define ZT_AES_AESNI 1
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -37,7 +37,8 @@ template< typename V >
|
||||||
class Vector : public std::vector< V >
|
class Vector : public std::vector< V >
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
ZT_INLINE Vector()
|
ZT_INLINE Vector() :
|
||||||
|
std::vector< V >()
|
||||||
{}
|
{}
|
||||||
|
|
||||||
template< typename I >
|
template< typename I >
|
||||||
|
|
|
@ -26,6 +26,8 @@ import (
|
||||||
"unsafe"
|
"unsafe"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// TODO: export keys in ssh format?
|
||||||
|
|
||||||
const (
|
const (
|
||||||
IdentityTypeC25519 = 0
|
IdentityTypeC25519 = 0
|
||||||
IdentityTypeP384 = 1
|
IdentityTypeP384 = 1
|
||||||
|
|
Loading…
Add table
Reference in a new issue