mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-06-07 04:53:44 +02:00
Some cleanup and optimization.
This commit is contained in:
parent
e213317532
commit
bd0299f392
3 changed files with 60 additions and 9 deletions
|
@ -39,6 +39,7 @@
|
||||||
#include <WinSock2.h>
|
#include <WinSock2.h>
|
||||||
#include <ws2tcpip.h>
|
#include <ws2tcpip.h>
|
||||||
#include <Windows.h>
|
#include <Windows.h>
|
||||||
|
#include <memoryapi.h>
|
||||||
#include <shlwapi.h>
|
#include <shlwapi.h>
|
||||||
#include <Shlobj.h>
|
#include <Shlobj.h>
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
|
|
|
@ -122,7 +122,7 @@ static void sha512_process(sha512_state *const md,const uint8_t *in,unsigned lon
|
||||||
inlen -= 128;
|
inlen -= 128;
|
||||||
} else {
|
} else {
|
||||||
unsigned long n = std::min(inlen,(128 - md->curlen));
|
unsigned long n = std::min(inlen,(128 - md->curlen));
|
||||||
memcpy(md->buf + md->curlen,in,n);
|
Utils::copy(md->buf + md->curlen,in,n);
|
||||||
md->curlen += n;
|
md->curlen += n;
|
||||||
in += n;
|
in += n;
|
||||||
inlen -= n;
|
inlen -= n;
|
||||||
|
@ -179,7 +179,7 @@ void SHA384(void *digest,const void *data,unsigned int len)
|
||||||
sha384_init(&state);
|
sha384_init(&state);
|
||||||
sha512_process(&state,(uint8_t *)data,(unsigned long)len);
|
sha512_process(&state,(uint8_t *)data,(unsigned long)len);
|
||||||
sha512_done(&state,tmp);
|
sha512_done(&state,tmp);
|
||||||
memcpy(digest,tmp,48);
|
Utils::copy<48>(digest,tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SHA384(void *digest,const void *data0,unsigned int len0,const void *data1,unsigned int len1)
|
void SHA384(void *digest,const void *data0,unsigned int len0,const void *data1,unsigned int len1)
|
||||||
|
@ -190,7 +190,7 @@ void SHA384(void *digest,const void *data0,unsigned int len0,const void *data1,u
|
||||||
sha512_process(&state,(uint8_t *)data0,(unsigned long)len0);
|
sha512_process(&state,(uint8_t *)data0,(unsigned long)len0);
|
||||||
sha512_process(&state,(uint8_t *)data1,(unsigned long)len1);
|
sha512_process(&state,(uint8_t *)data1,(unsigned long)len1);
|
||||||
sha512_done(&state,tmp);
|
sha512_done(&state,tmp);
|
||||||
memcpy(digest,tmp,48);
|
Utils::copy<48>(digest,tmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif // !ZT_HAVE_NATIVE_SHA512
|
#endif // !ZT_HAVE_NATIVE_SHA512
|
||||||
|
|
|
@ -55,7 +55,6 @@ namespace Utils {
|
||||||
#define ZT_ROL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
|
#define ZT_ROL32(x, r) (((x) << (r)) | ((x) >> (32 - (r))))
|
||||||
|
|
||||||
#ifdef ZT_ARCH_X64
|
#ifdef ZT_ARCH_X64
|
||||||
|
|
||||||
struct CPUIDRegisters
|
struct CPUIDRegisters
|
||||||
{
|
{
|
||||||
CPUIDRegisters() noexcept;
|
CPUIDRegisters() noexcept;
|
||||||
|
@ -70,7 +69,6 @@ struct CPUIDRegisters
|
||||||
bool sha;
|
bool sha;
|
||||||
bool fsrm;
|
bool fsrm;
|
||||||
};
|
};
|
||||||
|
|
||||||
extern const CPUIDRegisters CPUID;
|
extern const CPUIDRegisters CPUID;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -104,7 +102,9 @@ extern const uint64_t s_mapNonce;
|
||||||
*/
|
*/
|
||||||
static ZT_INLINE void memoryLock(const void *const p, const unsigned int l) noexcept
|
static ZT_INLINE void memoryLock(const void *const p, const unsigned int l) noexcept
|
||||||
{
|
{
|
||||||
#ifndef __WINDOWS__
|
#ifdef __WINDOWS__
|
||||||
|
VirtualLock(p, l);
|
||||||
|
#else
|
||||||
mlock(p, l);
|
mlock(p, l);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -117,7 +117,9 @@ static ZT_INLINE void memoryLock(const void *const p, const unsigned int l) noex
|
||||||
*/
|
*/
|
||||||
static ZT_INLINE void memoryUnlock(const void *const p, const unsigned int l) noexcept
|
static ZT_INLINE void memoryUnlock(const void *const p, const unsigned int l) noexcept
|
||||||
{
|
{
|
||||||
#ifndef __WINDOWS__
|
#ifdef __WINDOWS__
|
||||||
|
VirtualUnlock(p, l);
|
||||||
|
#else
|
||||||
munlock(p, l);
|
munlock(p, l);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -695,6 +697,23 @@ static ZT_INLINE void storeLittleEndian(void *const p, const I i) noexcept
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Note on copy() and zero():
|
||||||
|
*
|
||||||
|
* On X64, rep/movsb and rep/stosb are almost always faster for small memory
|
||||||
|
* regions on all but the oldest microarchitectures (and even there the
|
||||||
|
* difference is not large). While more aggressive memcpy() implementations
|
||||||
|
* may be faster in micro-benchmarks, these fail to account for real world
|
||||||
|
* context such as instruction cache and pipeline pressure. A simple
|
||||||
|
* instruction like rep/movsb takes up only a few spots in caches and pipelines
|
||||||
|
* and requires no branching or function calls. Specialized memcpy() can still
|
||||||
|
* be faster for large memory regions, but ZeroTier doesn't copy anything
|
||||||
|
* much larger than 16KiB.
|
||||||
|
*
|
||||||
|
* A templated version for statically known sizes is provided since this can
|
||||||
|
* allow some nice optimizations in some cases.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy memory block whose size is known at compile time.
|
* Copy memory block whose size is known at compile time.
|
||||||
*
|
*
|
||||||
|
@ -706,13 +725,44 @@ template< unsigned long L >
|
||||||
static ZT_INLINE void copy(void *dest, const void *src) noexcept
|
static ZT_INLINE void copy(void *dest, const void *src) noexcept
|
||||||
{
|
{
|
||||||
#if defined(ZT_ARCH_X64) && defined(__GNUC__)
|
#if defined(ZT_ARCH_X64) && defined(__GNUC__)
|
||||||
unsigned long l = L;
|
uintptr_t l = L;
|
||||||
asm volatile ("cld ; rep movsb" : "+c"(l), "+S"(src), "+D"(dest));
|
asm volatile ("cld ; rep movsb" : "+c"(l), "+S"(src), "+D"(dest));
|
||||||
#else
|
#else
|
||||||
memcpy(dest, src, L);
|
memcpy(dest, src, L);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Avoid rep/movsb startup time for some small common sizes.
|
||||||
|
template<>
|
||||||
|
ZT_INLINE void copy<4>(void *dest, const void *src) noexcept
|
||||||
|
{
|
||||||
|
*reinterpret_cast<uint32_t *>(dest) = *reinterpret_cast<const uint32_t *>(src);
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
ZT_INLINE void copy<8>(void *dest, const void *src) noexcept
|
||||||
|
{
|
||||||
|
*reinterpret_cast<uint64_t *>(dest) = *reinterpret_cast<const uint64_t *>(src);
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
ZT_INLINE void copy<12>(void *dest, const void *src) noexcept
|
||||||
|
{
|
||||||
|
*reinterpret_cast<uint64_t *>(dest) = *reinterpret_cast<const uint64_t *>(src);
|
||||||
|
*reinterpret_cast<uint32_t *>(reinterpret_cast<uint8_t *>(dest) + 8) = *reinterpret_cast<const uint32_t *>(reinterpret_cast<const uint8_t *>(src) + 8);
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
ZT_INLINE void copy<16>(void *dest, const void *src) noexcept
|
||||||
|
{
|
||||||
|
*reinterpret_cast<uint64_t *>(dest) = *reinterpret_cast<const uint64_t *>(src);
|
||||||
|
*reinterpret_cast<uint64_t *>(reinterpret_cast<uint8_t *>(dest) + 8) = *reinterpret_cast<const uint64_t *>(reinterpret_cast<const uint8_t *>(src) + 8);
|
||||||
|
}
|
||||||
|
template<>
|
||||||
|
ZT_INLINE void copy<24>(void *dest, const void *src) noexcept
|
||||||
|
{
|
||||||
|
*reinterpret_cast<uint64_t *>(dest) = *reinterpret_cast<const uint64_t *>(src);
|
||||||
|
*reinterpret_cast<uint64_t *>(reinterpret_cast<uint8_t *>(dest) + 8) = *reinterpret_cast<const uint64_t *>(reinterpret_cast<const uint8_t *>(src) + 8);
|
||||||
|
*reinterpret_cast<uint64_t *>(reinterpret_cast<uint8_t *>(dest) + 16) = *reinterpret_cast<const uint64_t *>(reinterpret_cast<const uint8_t *>(src) + 16);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Copy memory block whose size is known at run time
|
* Copy memory block whose size is known at run time
|
||||||
*
|
*
|
||||||
|
@ -739,7 +789,7 @@ template< unsigned long L >
|
||||||
static ZT_INLINE void zero(void *dest) noexcept
|
static ZT_INLINE void zero(void *dest) noexcept
|
||||||
{
|
{
|
||||||
#if defined(ZT_ARCH_X64) && defined(__GNUC__)
|
#if defined(ZT_ARCH_X64) && defined(__GNUC__)
|
||||||
unsigned long l = L;
|
uintptr_t l = L;
|
||||||
asm volatile ("cld ; rep stosb" :"+c" (l), "+D" (dest) : "a" (0));
|
asm volatile ("cld ; rep stosb" :"+c" (l), "+D" (dest) : "a" (0));
|
||||||
#else
|
#else
|
||||||
memset(dest, 0, L);
|
memset(dest, 0, L);
|
||||||
|
|
Loading…
Add table
Reference in a new issue