mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-06-06 20:43:44 +02:00
Some perf stuff, docs, HELLO design tweaks for ephemeral keys.
This commit is contained in:
parent
1f2e94a51d
commit
369df245e3
3 changed files with 147 additions and 243 deletions
BIN
doc/2015-GCM-SIV.pdf
Normal file
BIN
doc/2015-GCM-SIV.pdf
Normal file
Binary file not shown.
|
@ -252,17 +252,17 @@
|
||||||
/**
|
/**
|
||||||
* HELLO exchange meta-data: ephemeral C25519 public key
|
* HELLO exchange meta-data: ephemeral C25519 public key
|
||||||
*/
|
*/
|
||||||
#define ZT_PROTO_HELLO_NODE_META_EPHEMERAL_KEY_C25519 "e0"
|
#define ZT_PROTO_HELLO_NODE_META_EPHEMERAL_C25519 "e0"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HELLO exchange meta-data: ephemeral NIST P-384 public key
|
* HELLO exchange meta-data: ephemeral NIST P-384 public key
|
||||||
*/
|
*/
|
||||||
#define ZT_PROTO_HELLO_NODE_META_EPHEMERAL_KEY_P384 "e1"
|
#define ZT_PROTO_HELLO_NODE_META_EPHEMERAL_P384 "e1"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HELLO exchange meta-data: address(es) of nodes to whom this node will relay
|
* HELLO exchange meta-data: address(es) of nodes to whom this node will relay
|
||||||
*/
|
*/
|
||||||
#define ZT_PROTO_HELLO_NODE_META_WILL_RELAY_TO "wr"
|
#define ZT_PROTO_HELLO_NODE_META_NEIGHBORS "wr"
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* HELLO exchange meta-data: X coordinate of your node (sent in OK(HELLO))
|
* HELLO exchange meta-data: X coordinate of your node (sent in OK(HELLO))
|
||||||
|
@ -304,53 +304,93 @@ enum Verb
|
||||||
/**
|
/**
|
||||||
* Announcement of a node's existence and vitals:
|
* Announcement of a node's existence and vitals:
|
||||||
* <[1] protocol version>
|
* <[1] protocol version>
|
||||||
* <[1] software major version>
|
* <[1] software major version (LEGACY)>
|
||||||
* <[1] software minor version>
|
* <[1] software minor version (LEGACY)>
|
||||||
* <[2] software revision>
|
* <[2] software revision (LEGACY)>
|
||||||
* <[8] timestamp for determining latency>
|
* <[8] timestamp for determining latency (LEGACY)>
|
||||||
* <[...] binary serialized identity>
|
* <[...] binary serialized identity>
|
||||||
* <[...] physical destination address of packet>
|
* <[...] physical destination address of packet (LEGACY)>
|
||||||
* [... begin encrypted region ...]
|
* <[2] 16-bit reserved "encrypted zero" field (LEGACY)>
|
||||||
* <[2] 16-bit reserved (legacy) field, always 0>
|
* <[...] encrypted dictionary>
|
||||||
* <[2] 16-bit length of meta-data dictionary>
|
* <[2] 16-bit length of preceding encrypted dictionary>
|
||||||
* <[...] meta-data dictionary>
|
* <[48] HMAC-SHA384 of plaintext packet (with hops masked to 0)>
|
||||||
* <[2] 16-bit length of any additional fields>
|
|
||||||
* [... end encrypted region ...]
|
|
||||||
* <[48] HMAC-SHA384 of packet (with hops field masked to 0)>
|
|
||||||
*
|
*
|
||||||
* HELLO is sent using the POLY1305_NONE cipher setting (MAC but
|
* HELLO is sent to initiate a new pairing between two nodes.
|
||||||
* no encryption) and as of protocol version 11 contains an extra
|
|
||||||
* HMAC-SHA384 MAC for additional authentication hardening.
|
|
||||||
*
|
*
|
||||||
* The physical desgination address is the raw InetAddress to which the
|
* HELLO is the only packet ever sent without normal payload encryption,
|
||||||
* packet was sent, regardless of any relaying used.
|
* though an inner encrypted envelope exists to obscure all fields that
|
||||||
|
* do not need to be sent in the clear. HELLO's MAC field contains a
|
||||||
|
* Poly1305 MAC for backward compatibility, and v2.x adds an additional
|
||||||
|
* HMAC-SHA384 at the end for stronger authentication of sessions. HELLO
|
||||||
|
* authentication is performed using the long-lived identity key only,
|
||||||
|
* and the encryption of the inner dictionary field is done using a key
|
||||||
|
* derived from this identity key explicitly for this purpose.
|
||||||
*
|
*
|
||||||
* HELLO packets have an encrypted section that is encrypted with
|
* The main payload of HELLO is the protocol version and the full identity
|
||||||
* Salsa20/12 using the two peers' long-term negotiated keys and with
|
* of the sender, which includes the sender's public key(s). An encrypted
|
||||||
* the packet ID (with least significant 3 bits masked to 0 for legacy
|
* dictionary (key/value store) is also included for additional information.
|
||||||
* reasons) as the Salsa20/12 IV. This encryption is technically not
|
* This is encrypted using AES-CTR with a derived key and using the final
|
||||||
* necessary but serves to protect the privacy of locators and other
|
* 96 bits of the packet's HMAC-SHA384 as the CTR IV. (The HMAC authenticates
|
||||||
* fields for a little added defense in depth. Note to auditors: for FIPS
|
* the packet prior to this field being encrypted, making this a SIV
|
||||||
* or other auditing purposes this crypto can be ignored as its
|
* construction much like AES-GMAC-SIV.)
|
||||||
* compromise poses no risk to peer or network authentication or transport
|
*
|
||||||
* data privacy. HMAC is computed after this encryption is performed and
|
* The length of the dictionary field is included immediately after it so
|
||||||
* is verified before decryption is performed.
|
* that it can be decrypted and the HMAC validated without performing any
|
||||||
|
* parsing of anything else, since it's a good idea to authenticate any
|
||||||
|
* message as early as possible in any secure protocol.
|
||||||
|
*
|
||||||
|
* V1.x will ignore the HMAC and dictionary fields as it doesn't understand
|
||||||
|
* them, but the packet is constructed so that 1.x nodes will parse what
|
||||||
|
* they need to communicate with 2.x nodes (without forward secrecy) as long
|
||||||
|
* as we wish to support this.
|
||||||
|
*
|
||||||
|
* Several legacy fields are present as well for the benefit of 1.x nodes.
|
||||||
|
* These will go away and become simple reserved space once 1.x is no longer
|
||||||
|
* supported. Some are self-explanatory. The "encrypted zero" is rather
|
||||||
|
* strange. It's a 16-bit zero value encrypted using Salsa20/12 and the
|
||||||
|
* long-lived identity key shared by the two peers. It tells 1.x that an
|
||||||
|
* old encrypted field is no longer there and that it should stop parsing
|
||||||
|
* the packet at that point.
|
||||||
|
*
|
||||||
|
* The following fields are nearly always present and must exist to support
|
||||||
|
* forward secrecy (in the case of the instance ID, keys, and key revision)
|
||||||
|
* or federated root membership (in the case of the locator).
|
||||||
|
*
|
||||||
|
* TIMESTAMP - node's timestamp in milliseconds (supersedes legacy field)
|
||||||
|
* INSTANCE_ID - a 64-bit unique value generated on each node start
|
||||||
|
* EPHEMERAL_C25519 - an ephemeral Curve25519 public key
|
||||||
|
* EPHEMERAL_P384 - an ephemeral NIST P-384 public key
|
||||||
|
* EPHEMERAL_REVISION - 64-bit monotonically increasing per-instance counter
|
||||||
|
* LOCATOR - signed record enumerating this node's trusted contact points
|
||||||
|
*
|
||||||
|
* The following optional fields may also be present:
|
||||||
|
*
|
||||||
|
* NAME - abitrary short user-defined name for this node
|
||||||
|
* CONTACT - arbitrary short contact information string for this node
|
||||||
|
* NEIGHBORS - addresses of node(s) to whom we'll relay (mesh-like routing)
|
||||||
|
* LOC_X, LOC_Y, LOC_Z - location relative to the nearest large center of mass
|
||||||
|
* PEER_LOC_X, PEER_LOC_Y, PEER_LOC_Z - where sender thinks peer is located
|
||||||
|
* SOFTWARE_VENDOR - short name or description of vendor, such as a URL
|
||||||
|
* SOFTWARE_VERSION - major, minor, revision, and build, and 16-bit integers
|
||||||
|
* PHYSICAL_DEST - serialized Endpoint to which this message was sent
|
||||||
|
* VIRTUAL_DEST - ZeroTier address of first hop (if first hop wasn't destination)
|
||||||
|
* COMPLIANCE - bit mask containing bits for e.g. a FIPS-compliant node
|
||||||
*
|
*
|
||||||
* A valid and successfully authenticated HELLO will generate the following
|
* A valid and successfully authenticated HELLO will generate the following
|
||||||
* OK response which contains much of the same information about the
|
* OK response. It contains an echo of the timestamp supplied by the
|
||||||
* responding peer.
|
* initiating peer, the protocol version, and a dictionary containing
|
||||||
|
* the same information about the responding peer as the originating peer
|
||||||
|
* sent.
|
||||||
*
|
*
|
||||||
* OK payload:
|
* OK payload:
|
||||||
* <[8] timestamp echoed from original HELLO packet>
|
* <[8] timestamp echoed from original HELLO>
|
||||||
* <[1] protocol version>
|
* <[1] protocol version>
|
||||||
* <[1] software major version>
|
* <[1] software major version (LEGACY)>
|
||||||
* <[1] software minor version>
|
* <[1] software minor version (LEGACY)>
|
||||||
* <[2] software revision>
|
* <[2] software revision (LEGACY)>
|
||||||
* <[...] physical destination address of packet>
|
* <[...] physical destination address of packet (LEGACY)>
|
||||||
* <[2] 16-bit reserved (legacy) field, currently must be 0>
|
* <[2] 16-bit reserved zero field (LEGACY)>
|
||||||
* <[2] 16-bit length of meta-data dictionary>
|
* <[...] dictionary>
|
||||||
* <[...] meta-data dictionary>
|
|
||||||
* <[2] 16-bit length of any additional fields>
|
|
||||||
* <[48] HMAC-SHA384 of plaintext packet (with hops masked to 0)>
|
* <[48] HMAC-SHA384 of plaintext packet (with hops masked to 0)>
|
||||||
*/
|
*/
|
||||||
VERB_HELLO = 0x01,
|
VERB_HELLO = 0x01,
|
||||||
|
|
264
node/Utils.hpp
264
node/Utils.hpp
|
@ -583,99 +583,18 @@ static ZT_INLINE void storeLittleEndian(void *const p,const I i) noexcept
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
template<unsigned int L>
|
/**
|
||||||
static ZT_INLINE void copy(void *dest,const void *src) noexcept;
|
* Copy memory block whose size is known at compile time
|
||||||
template<>
|
*
|
||||||
ZT_INLINE void copy<64>(void *const dest,const void *const src) noexcept
|
* @tparam L Size of memory
|
||||||
{
|
* @param dest Destination memory
|
||||||
#ifdef ZT_ARCH_X64
|
* @param src Source memory
|
||||||
__m128i a = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
|
*/
|
||||||
__m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src) + 1);
|
|
||||||
__m128i c = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src) + 2);
|
|
||||||
__m128i d = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src) + 3);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest),a);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1,b);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 2,c);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 3,d);
|
|
||||||
#else
|
|
||||||
uint64_t a = reinterpret_cast<const uint64_t *>(src)[0];
|
|
||||||
uint64_t b = reinterpret_cast<const uint64_t *>(src)[1];
|
|
||||||
uint64_t c = reinterpret_cast<const uint64_t *>(src)[2];
|
|
||||||
uint64_t d = reinterpret_cast<const uint64_t *>(src)[3];
|
|
||||||
uint64_t e = reinterpret_cast<const uint64_t *>(src)[4];
|
|
||||||
uint64_t f = reinterpret_cast<const uint64_t *>(src)[5];
|
|
||||||
uint64_t g = reinterpret_cast<const uint64_t *>(src)[6];
|
|
||||||
uint64_t h = reinterpret_cast<const uint64_t *>(src)[7];
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[0] = a;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[1] = b;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[2] = c;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[3] = d;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[4] = e;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[5] = f;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[6] = g;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[7] = h;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void copy<32>(void *const dest,const void *const src) noexcept
|
|
||||||
{
|
|
||||||
#ifdef ZT_ARCH_X64
|
|
||||||
__m128i a = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src));
|
|
||||||
__m128i b = _mm_loadu_si128(reinterpret_cast<const __m128i *>(src) + 1);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest),a);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1,b);
|
|
||||||
#else
|
|
||||||
uint64_t a = reinterpret_cast<const uint64_t *>(src)[0];
|
|
||||||
uint64_t b = reinterpret_cast<const uint64_t *>(src)[1];
|
|
||||||
uint64_t c = reinterpret_cast<const uint64_t *>(src)[2];
|
|
||||||
uint64_t d = reinterpret_cast<const uint64_t *>(src)[3];
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[0] = a;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[1] = b;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[2] = c;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[3] = d;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void copy<16>(void *const dest,const void *const src) noexcept
|
|
||||||
{
|
|
||||||
#ifdef ZT_ARCH_X64
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest),_mm_loadu_si128(reinterpret_cast<const __m128i *>(src)));
|
|
||||||
#else
|
|
||||||
uint64_t a = reinterpret_cast<const uint64_t *>(src)[0];
|
|
||||||
uint64_t b = reinterpret_cast<const uint64_t *>(src)[1];
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[0] = a;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[1] = b;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void copy<8>(void *const dest,const void *const src) noexcept
|
|
||||||
{
|
|
||||||
*reinterpret_cast<uint64_t *>(dest) = *reinterpret_cast<const uint64_t *>(src);
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void copy<4>(void *const dest,const void *const src) noexcept
|
|
||||||
{
|
|
||||||
*reinterpret_cast<uint32_t *>(dest) = *reinterpret_cast<const uint32_t *>(src);
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void copy<2>(void *const dest,const void *const src) noexcept
|
|
||||||
{
|
|
||||||
*reinterpret_cast<uint16_t *>(dest) = *reinterpret_cast<const uint16_t *>(src);
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void copy<1>(void *const dest,const void *const src) noexcept
|
|
||||||
{
|
|
||||||
*reinterpret_cast<uint8_t *>(dest) = *reinterpret_cast<const uint8_t *>(src);
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void copy<0>(void *const dest,const void *const src) noexcept
|
|
||||||
{
|
|
||||||
}
|
|
||||||
template<unsigned int L>
|
template<unsigned int L>
|
||||||
static ZT_INLINE void copy(void *const dest,const void *const src) noexcept
|
static ZT_INLINE void copy(void *const dest,const void *const src) noexcept
|
||||||
{
|
{
|
||||||
#ifdef ZT_NO_UNALIGNED_ACCESS
|
#ifdef ZT_NO_UNALIGNED_ACCESS
|
||||||
if ((((uintptr_t)dest | (uintptr_t)src) & 7U) != 0) {
|
if ((((uintptr_t)dest | (uintptr_t)src) & (sizeof(uintptr_t) - 1)) != 0) {
|
||||||
memcpy(dest,src,L);
|
memcpy(dest,src,L);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -684,154 +603,99 @@ static ZT_INLINE void copy(void *const dest,const void *const src) noexcept
|
||||||
uint8_t *d = reinterpret_cast<uint8_t *>(dest);
|
uint8_t *d = reinterpret_cast<uint8_t *>(dest);
|
||||||
const uint8_t *s = reinterpret_cast<const uint8_t *>(src);
|
const uint8_t *s = reinterpret_cast<const uint8_t *>(src);
|
||||||
|
|
||||||
|
#ifdef ZT_ARCH_X64
|
||||||
for(unsigned int i=0;i<(L / 64U);++i) {
|
for(unsigned int i=0;i<(L / 64U);++i) {
|
||||||
copy<64>(d,s);
|
__m128i x0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s));
|
||||||
|
__m128i x1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s) + 1);
|
||||||
|
__m128i x2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s) + 2);
|
||||||
|
__m128i x3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s) + 3);
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(d),x0);
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(d) + 1,x1);
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(d) + 2,x2);
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(d) + 3,x3);
|
||||||
d += 64;
|
d += 64;
|
||||||
s += 64;
|
s += 64;
|
||||||
}
|
}
|
||||||
if ((L & 63U) >= 32U) {
|
if ((L & 63U) >= 32U) {
|
||||||
copy<32>(d,s);
|
__m128i x0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s));
|
||||||
|
__m128i x1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s) + 1);
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(d),x0);
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(d) + 1,x1);
|
||||||
d += 32;
|
d += 32;
|
||||||
s += 32;
|
s += 32;
|
||||||
}
|
}
|
||||||
if ((L & 31U) >= 16U) {
|
if ((L & 31U) >= 16U) {
|
||||||
copy<16>(d,s);
|
__m128i x0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(s));
|
||||||
|
_mm_storeu_si128(reinterpret_cast<__m128i *>(d),x0);
|
||||||
d += 16;
|
d += 16;
|
||||||
s += 16;
|
s += 16;
|
||||||
}
|
}
|
||||||
if ((L & 15U) >= 8U) {
|
if ((L & 15U) >= 8U) {
|
||||||
copy<8>(d,s);
|
*reinterpret_cast<uint64_t *>(d) = *reinterpret_cast<const uint64_t *>(s);
|
||||||
d += 8;
|
d += 8;
|
||||||
s += 8;
|
s += 8;
|
||||||
}
|
}
|
||||||
if ((L & 7U) >= 4U) {
|
if ((L & 7U) >= 4U) {
|
||||||
copy<4>(d,s);
|
*reinterpret_cast<uint32_t *>(d) = *reinterpret_cast<const uint32_t *>(s);
|
||||||
d += 4;
|
d += 4;
|
||||||
s += 4;
|
s += 4;
|
||||||
}
|
}
|
||||||
if ((L & 3U) >= 2U) {
|
if ((L & 3U) >= 2U) {
|
||||||
copy<2>(d,s);
|
*reinterpret_cast<uint16_t *>(d) = *reinterpret_cast<const uint16_t *>(s);
|
||||||
d += 2;
|
d += 2;
|
||||||
s += 2;
|
s += 2;
|
||||||
}
|
}
|
||||||
if ((L & 1U) != 0U) {
|
if ((L & 1U) != 0U) {
|
||||||
copy<1>(d,s);
|
*d = *s;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
for(unsigned int i=0;i<(L / (sizeof(uintptr_t) * 4));++i) {
|
||||||
|
uintptr_t x0 = reinterpret_cast<const uintptr_t *>(s)[0];
|
||||||
|
uintptr_t x1 = reinterpret_cast<const uintptr_t *>(s)[1];
|
||||||
|
uintptr_t x2 = reinterpret_cast<const uintptr_t *>(s)[2];
|
||||||
|
uintptr_t x3 = reinterpret_cast<const uintptr_t *>(s)[3];
|
||||||
|
reinterpret_cast<uintptr_t *>(d)[0] = x0;
|
||||||
|
reinterpret_cast<uintptr_t *>(d)[1] = x1;
|
||||||
|
reinterpret_cast<uintptr_t *>(d)[2] = x2;
|
||||||
|
reinterpret_cast<uintptr_t *>(d)[3] = x3;
|
||||||
|
s += (sizeof(uintptr_t) * 4);
|
||||||
|
d += (sizeof(uintptr_t) * 4);
|
||||||
}
|
}
|
||||||
static ZT_INLINE void copy(void *const dest,const void *const src,const unsigned int len) noexcept
|
for(unsigned int i=0;i<(L & ((sizeof(uintptr_t) * 4) - 1));++i)
|
||||||
|
d[i] = s[i];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy memory block whose size is known at run time
|
||||||
|
*
|
||||||
|
* @param dest Destination memory
|
||||||
|
* @param src Source memory
|
||||||
|
* @param len Bytes to copy
|
||||||
|
*/
|
||||||
|
static ZT_INLINE void copy(void *const dest,const void *const src,unsigned int len) noexcept
|
||||||
{
|
{
|
||||||
memcpy(dest,src,len);
|
memcpy(dest,src,len);
|
||||||
}
|
}
|
||||||
|
|
||||||
template<unsigned int L>
|
/**
|
||||||
static ZT_INLINE void zero(void *dest) noexcept;
|
* Zero memory block whose size is known at compile time
|
||||||
template<>
|
*
|
||||||
ZT_INLINE void zero<64>(void *const dest) noexcept
|
* @tparam L Size in bytes
|
||||||
{
|
* @param dest Memory to zero
|
||||||
#ifdef ZT_ARCH_X64
|
*/
|
||||||
const __m128i z = _mm_setzero_si128();
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest),z);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1,z);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 2,z);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 3,z);
|
|
||||||
#else
|
|
||||||
const uint64_t z = 0;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[0] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[1] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[2] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[3] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[4] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[5] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[6] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[7] = z;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void zero<32>(void *const dest) noexcept
|
|
||||||
{
|
|
||||||
#ifdef ZT_ARCH_X64
|
|
||||||
const __m128i z = _mm_setzero_si128();
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest),z);
|
|
||||||
_mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1,z);
|
|
||||||
#else
|
|
||||||
const uint64_t z = 0;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[0] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[1] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[2] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[3] = z;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void zero<16>(void *const dest) noexcept
|
|
||||||
{
|
|
||||||
const uint64_t z = 0;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[0] = z;
|
|
||||||
reinterpret_cast<uint64_t *>(dest)[1] = z;
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void zero<8>(void *const dest) noexcept
|
|
||||||
{
|
|
||||||
*reinterpret_cast<uint64_t *>(dest) = 0;
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void zero<4>(void *const dest) noexcept
|
|
||||||
{
|
|
||||||
*reinterpret_cast<uint32_t *>(dest) = 0;
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void zero<2>(void *const dest) noexcept
|
|
||||||
{
|
|
||||||
*reinterpret_cast<uint16_t *>(dest) = 0;
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void zero<1>(void *const dest) noexcept
|
|
||||||
{
|
|
||||||
*reinterpret_cast<uint8_t *>(dest) = 0;
|
|
||||||
}
|
|
||||||
template<>
|
|
||||||
ZT_INLINE void zero<0>(void *const dest) noexcept
|
|
||||||
{
|
|
||||||
}
|
|
||||||
template<unsigned int L>
|
template<unsigned int L>
|
||||||
static ZT_INLINE void zero(void *const dest) noexcept
|
static ZT_INLINE void zero(void *const dest) noexcept
|
||||||
{
|
{
|
||||||
#ifdef ZT_NO_UNALIGNED_ACCESS
|
|
||||||
if ((((uintptr_t)dest | (uintptr_t)src) & 7U) != 0) {
|
|
||||||
memset(dest,0,L);
|
memset(dest,0,L);
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
uint8_t *d = reinterpret_cast<uint8_t *>(dest);
|
/**
|
||||||
|
* Zero memory block whose size is known at run time
|
||||||
for(unsigned int i=0;i<(L / 64U);++i) {
|
*
|
||||||
zero<64>(d);
|
* @param dest Memory to zero
|
||||||
d += 64;
|
* @param len Size in bytes
|
||||||
}
|
*/
|
||||||
if ((L & 63U) >= 32U) {
|
|
||||||
zero<32>(d);
|
|
||||||
d += 32;
|
|
||||||
}
|
|
||||||
if ((L & 31U) >= 16U) {
|
|
||||||
zero<16>(d);
|
|
||||||
d += 16;
|
|
||||||
}
|
|
||||||
if ((L & 15U) >= 8U) {
|
|
||||||
zero<8>(d);
|
|
||||||
d += 8;
|
|
||||||
}
|
|
||||||
if ((L & 7U) >= 4U) {
|
|
||||||
zero<4>(d);
|
|
||||||
d += 4;
|
|
||||||
}
|
|
||||||
if ((L & 3U) >= 2U) {
|
|
||||||
zero<2>(d);
|
|
||||||
d += 2;
|
|
||||||
}
|
|
||||||
if ((L & 1U) != 0U) {
|
|
||||||
zero<1>(d);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
static ZT_INLINE void zero(void *const dest,const unsigned int len) noexcept
|
static ZT_INLINE void zero(void *const dest,const unsigned int len) noexcept
|
||||||
{
|
{
|
||||||
memset(dest,0,len);
|
memset(dest,0,len);
|
||||||
|
|
Loading…
Add table
Reference in a new issue