diff --git a/node/Buf.hpp b/node/Buf.hpp index af03db126..be83e376f 100644 --- a/node/Buf.hpp +++ b/node/Buf.hpp @@ -165,7 +165,7 @@ public: r.e = 0; break; } - memcpy(r.b->unsafeData + r.e,s->b->unsafeData + s->s,l); + Utils::copy(r.b->unsafeData + r.e,s->b->unsafeData + s->s,l); s->b.zero(); // let go of buffer in vector as soon as possible r.e += l; } @@ -182,14 +182,24 @@ public: /** * Create a new buffer and copy data into it */ - ZT_INLINE Buf(const void *const data,const unsigned int len) noexcept : __nextInPool(0),__refCount(0) { memcpy(unsafeData,data,len); } + ZT_INLINE Buf(const void *const data,const unsigned int len) noexcept : + __nextInPool(0), + __refCount(0) + { + Utils::copy(unsafeData,data,len); + } - ZT_INLINE Buf(const Buf &b2) noexcept : __nextInPool(0),__refCount(0) { memcpy(unsafeData,b2.unsafeData,ZT_BUF_MEM_SIZE); } + ZT_INLINE Buf(const Buf &b2) noexcept : + __nextInPool(0), + __refCount(0) + { + Utils::copy(unsafeData,b2.unsafeData); + } ZT_INLINE Buf &operator=(const Buf &b2) noexcept { if (this != &b2) - memcpy(unsafeData,b2.unsafeData,ZT_BUF_MEM_SIZE); + Utils::copy(unsafeData,b2.unsafeData); return *this; } @@ -219,12 +229,18 @@ public: /** * Set all memory to zero */ - ZT_INLINE void clear() noexcept { memset(unsafeData,0,ZT_BUF_MEM_SIZE); } + ZT_INLINE void clear() noexcept + { + Utils::zero(unsafeData); + } /** * Zero security critical data using Utils::burn() to ensure it's never optimized out. */ - ZT_INLINE void burn() noexcept { Utils::burn(unsafeData,ZT_BUF_MEM_SIZE); } + ZT_INLINE void burn() noexcept + { + Utils::burn(unsafeData,ZT_BUF_MEM_SIZE); + } /** * Read a byte @@ -347,7 +363,7 @@ public: const int sii = ii; while (ii < ZT_BUF_MEM_SIZE) { if (unsafeData[ii++] == 0) { - memcpy(buf,s,ii - sii); + Utils::copy(buf,s,ii - sii); return buf; } } @@ -391,7 +407,7 @@ public: ZT_INLINE uint8_t *rB(int &ii,void *const bytes,const unsigned int len) const noexcept { if ((ii += (int)len) <= ZT_BUF_MEM_SIZE) { - memcpy(bytes,unsafeData + ii,len); + Utils::copy(bytes,unsafeData + ii,len); return reinterpret_cast(bytes); } return nullptr; @@ -617,7 +633,7 @@ public: { const int s = ii; if ((ii += (int)len) <= ZT_BUF_MEM_SIZE) - memcpy(unsafeData + s,bytes,len); + Utils::copy(unsafeData + s,bytes,len); } /** diff --git a/node/C25519.cpp b/node/C25519.cpp index 48b8916dd..4f630a971 100644 --- a/node/C25519.cpp +++ b/node/C25519.cpp @@ -12,6 +12,7 @@ Derived from public domain code by D. J. Bernstein. #include "C25519.hpp" #include "SHA512.hpp" +#include "Utils.hpp" #ifdef __WINDOWS__ #pragma warning(disable: 4146) @@ -21,6 +22,8 @@ Derived from public domain code by D. J. Bernstein. #pragma GCC diagnostic ignored "-Wunused-function" #endif +using namespace ZeroTier; + namespace { // -------------------------------------------------------------------------------------------------------------------- @@ -32,158 +35,158 @@ typedef int32_t s32; typedef int64_t limb; ZT_INLINE void fsum(limb *output,const limb *in) { - unsigned i; - for (i = 0; i < 10; i += 2) { - output[0+i] = output[0+i] + in[0+i]; - output[1+i] = output[1+i] + in[1+i]; - } + unsigned i; + for (i = 0; i < 10; i += 2) { + output[0+i] = output[0+i] + in[0+i]; + output[1+i] = output[1+i] + in[1+i]; + } } ZT_INLINE void fdifference(limb *output,const limb *in) { - unsigned i; - for (i = 0; i < 10; ++i) { - output[i] = in[i] - output[i]; - } + unsigned i; + for (i = 0; i < 10; ++i) { + output[i] = in[i] - output[i]; + } } ZT_INLINE void fscalar_product(limb *output,const limb *in,const limb scalar) { - unsigned i; - for (i = 0; i < 10; ++i) { - output[i] = in[i] * scalar; - } + unsigned i; + for (i = 0; i < 10; ++i) { + output[i] = in[i] * scalar; + } } -ZT_INLINE void fproduct(limb *output,const limb *in2,const limb *in) { - output[0] = ((limb) ((s32) in2[0])) * ((s32) in[0]); - output[1] = ((limb) ((s32) in2[0])) * ((s32) in[1]) + - ((limb) ((s32) in2[1])) * ((s32) in[0]); - output[2] = 2 * ((limb) ((s32) in2[1])) * ((s32) in[1]) + - ((limb) ((s32) in2[0])) * ((s32) in[2]) + - ((limb) ((s32) in2[2])) * ((s32) in[0]); - output[3] = ((limb) ((s32) in2[1])) * ((s32) in[2]) + - ((limb) ((s32) in2[2])) * ((s32) in[1]) + - ((limb) ((s32) in2[0])) * ((s32) in[3]) + - ((limb) ((s32) in2[3])) * ((s32) in[0]); - output[4] = ((limb) ((s32) in2[2])) * ((s32) in[2]) + - 2 * (((limb) ((s32) in2[1])) * ((s32) in[3]) + - ((limb) ((s32) in2[3])) * ((s32) in[1])) + - ((limb) ((s32) in2[0])) * ((s32) in[4]) + - ((limb) ((s32) in2[4])) * ((s32) in[0]); - output[5] = ((limb) ((s32) in2[2])) * ((s32) in[3]) + - ((limb) ((s32) in2[3])) * ((s32) in[2]) + - ((limb) ((s32) in2[1])) * ((s32) in[4]) + - ((limb) ((s32) in2[4])) * ((s32) in[1]) + - ((limb) ((s32) in2[0])) * ((s32) in[5]) + - ((limb) ((s32) in2[5])) * ((s32) in[0]); - output[6] = 2 * (((limb) ((s32) in2[3])) * ((s32) in[3]) + - ((limb) ((s32) in2[1])) * ((s32) in[5]) + - ((limb) ((s32) in2[5])) * ((s32) in[1])) + - ((limb) ((s32) in2[2])) * ((s32) in[4]) + - ((limb) ((s32) in2[4])) * ((s32) in[2]) + - ((limb) ((s32) in2[0])) * ((s32) in[6]) + - ((limb) ((s32) in2[6])) * ((s32) in[0]); - output[7] = ((limb) ((s32) in2[3])) * ((s32) in[4]) + - ((limb) ((s32) in2[4])) * ((s32) in[3]) + - ((limb) ((s32) in2[2])) * ((s32) in[5]) + - ((limb) ((s32) in2[5])) * ((s32) in[2]) + - ((limb) ((s32) in2[1])) * ((s32) in[6]) + - ((limb) ((s32) in2[6])) * ((s32) in[1]) + - ((limb) ((s32) in2[0])) * ((s32) in[7]) + - ((limb) ((s32) in2[7])) * ((s32) in[0]); - output[8] = ((limb) ((s32) in2[4])) * ((s32) in[4]) + - 2 * (((limb) ((s32) in2[3])) * ((s32) in[5]) + - ((limb) ((s32) in2[5])) * ((s32) in[3]) + - ((limb) ((s32) in2[1])) * ((s32) in[7]) + - ((limb) ((s32) in2[7])) * ((s32) in[1])) + - ((limb) ((s32) in2[2])) * ((s32) in[6]) + - ((limb) ((s32) in2[6])) * ((s32) in[2]) + - ((limb) ((s32) in2[0])) * ((s32) in[8]) + - ((limb) ((s32) in2[8])) * ((s32) in[0]); - output[9] = ((limb) ((s32) in2[4])) * ((s32) in[5]) + - ((limb) ((s32) in2[5])) * ((s32) in[4]) + - ((limb) ((s32) in2[3])) * ((s32) in[6]) + - ((limb) ((s32) in2[6])) * ((s32) in[3]) + - ((limb) ((s32) in2[2])) * ((s32) in[7]) + - ((limb) ((s32) in2[7])) * ((s32) in[2]) + - ((limb) ((s32) in2[1])) * ((s32) in[8]) + - ((limb) ((s32) in2[8])) * ((s32) in[1]) + - ((limb) ((s32) in2[0])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[0]); - output[10] = 2 * (((limb) ((s32) in2[5])) * ((s32) in[5]) + - ((limb) ((s32) in2[3])) * ((s32) in[7]) + - ((limb) ((s32) in2[7])) * ((s32) in[3]) + - ((limb) ((s32) in2[1])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[1])) + - ((limb) ((s32) in2[4])) * ((s32) in[6]) + - ((limb) ((s32) in2[6])) * ((s32) in[4]) + - ((limb) ((s32) in2[2])) * ((s32) in[8]) + - ((limb) ((s32) in2[8])) * ((s32) in[2]); - output[11] = ((limb) ((s32) in2[5])) * ((s32) in[6]) + - ((limb) ((s32) in2[6])) * ((s32) in[5]) + - ((limb) ((s32) in2[4])) * ((s32) in[7]) + - ((limb) ((s32) in2[7])) * ((s32) in[4]) + - ((limb) ((s32) in2[3])) * ((s32) in[8]) + - ((limb) ((s32) in2[8])) * ((s32) in[3]) + - ((limb) ((s32) in2[2])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[2]); - output[12] = ((limb) ((s32) in2[6])) * ((s32) in[6]) + - 2 * (((limb) ((s32) in2[5])) * ((s32) in[7]) + - ((limb) ((s32) in2[7])) * ((s32) in[5]) + - ((limb) ((s32) in2[3])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[3])) + - ((limb) ((s32) in2[4])) * ((s32) in[8]) + - ((limb) ((s32) in2[8])) * ((s32) in[4]); - output[13] = ((limb) ((s32) in2[6])) * ((s32) in[7]) + - ((limb) ((s32) in2[7])) * ((s32) in[6]) + - ((limb) ((s32) in2[5])) * ((s32) in[8]) + - ((limb) ((s32) in2[8])) * ((s32) in[5]) + - ((limb) ((s32) in2[4])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[4]); - output[14] = 2 * (((limb) ((s32) in2[7])) * ((s32) in[7]) + - ((limb) ((s32) in2[5])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[5])) + - ((limb) ((s32) in2[6])) * ((s32) in[8]) + - ((limb) ((s32) in2[8])) * ((s32) in[6]); - output[15] = ((limb) ((s32) in2[7])) * ((s32) in[8]) + - ((limb) ((s32) in2[8])) * ((s32) in[7]) + - ((limb) ((s32) in2[6])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[6]); - output[16] = ((limb) ((s32) in2[8])) * ((s32) in[8]) + - 2 * (((limb) ((s32) in2[7])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[7])); - output[17] = ((limb) ((s32) in2[8])) * ((s32) in[9]) + - ((limb) ((s32) in2[9])) * ((s32) in[8]); - output[18] = 2 * ((limb) ((s32) in2[9])) * ((s32) in[9]); +void fproduct(limb *output,const limb *in2,const limb *in) { + output[0] = ((limb) ((s32) in2[0])) * ((s32) in[0]); + output[1] = ((limb) ((s32) in2[0])) * ((s32) in[1]) + + ((limb) ((s32) in2[1])) * ((s32) in[0]); + output[2] = 2 * ((limb) ((s32) in2[1])) * ((s32) in[1]) + + ((limb) ((s32) in2[0])) * ((s32) in[2]) + + ((limb) ((s32) in2[2])) * ((s32) in[0]); + output[3] = ((limb) ((s32) in2[1])) * ((s32) in[2]) + + ((limb) ((s32) in2[2])) * ((s32) in[1]) + + ((limb) ((s32) in2[0])) * ((s32) in[3]) + + ((limb) ((s32) in2[3])) * ((s32) in[0]); + output[4] = ((limb) ((s32) in2[2])) * ((s32) in[2]) + + 2 * (((limb) ((s32) in2[1])) * ((s32) in[3]) + + ((limb) ((s32) in2[3])) * ((s32) in[1])) + + ((limb) ((s32) in2[0])) * ((s32) in[4]) + + ((limb) ((s32) in2[4])) * ((s32) in[0]); + output[5] = ((limb) ((s32) in2[2])) * ((s32) in[3]) + + ((limb) ((s32) in2[3])) * ((s32) in[2]) + + ((limb) ((s32) in2[1])) * ((s32) in[4]) + + ((limb) ((s32) in2[4])) * ((s32) in[1]) + + ((limb) ((s32) in2[0])) * ((s32) in[5]) + + ((limb) ((s32) in2[5])) * ((s32) in[0]); + output[6] = 2 * (((limb) ((s32) in2[3])) * ((s32) in[3]) + + ((limb) ((s32) in2[1])) * ((s32) in[5]) + + ((limb) ((s32) in2[5])) * ((s32) in[1])) + + ((limb) ((s32) in2[2])) * ((s32) in[4]) + + ((limb) ((s32) in2[4])) * ((s32) in[2]) + + ((limb) ((s32) in2[0])) * ((s32) in[6]) + + ((limb) ((s32) in2[6])) * ((s32) in[0]); + output[7] = ((limb) ((s32) in2[3])) * ((s32) in[4]) + + ((limb) ((s32) in2[4])) * ((s32) in[3]) + + ((limb) ((s32) in2[2])) * ((s32) in[5]) + + ((limb) ((s32) in2[5])) * ((s32) in[2]) + + ((limb) ((s32) in2[1])) * ((s32) in[6]) + + ((limb) ((s32) in2[6])) * ((s32) in[1]) + + ((limb) ((s32) in2[0])) * ((s32) in[7]) + + ((limb) ((s32) in2[7])) * ((s32) in[0]); + output[8] = ((limb) ((s32) in2[4])) * ((s32) in[4]) + + 2 * (((limb) ((s32) in2[3])) * ((s32) in[5]) + + ((limb) ((s32) in2[5])) * ((s32) in[3]) + + ((limb) ((s32) in2[1])) * ((s32) in[7]) + + ((limb) ((s32) in2[7])) * ((s32) in[1])) + + ((limb) ((s32) in2[2])) * ((s32) in[6]) + + ((limb) ((s32) in2[6])) * ((s32) in[2]) + + ((limb) ((s32) in2[0])) * ((s32) in[8]) + + ((limb) ((s32) in2[8])) * ((s32) in[0]); + output[9] = ((limb) ((s32) in2[4])) * ((s32) in[5]) + + ((limb) ((s32) in2[5])) * ((s32) in[4]) + + ((limb) ((s32) in2[3])) * ((s32) in[6]) + + ((limb) ((s32) in2[6])) * ((s32) in[3]) + + ((limb) ((s32) in2[2])) * ((s32) in[7]) + + ((limb) ((s32) in2[7])) * ((s32) in[2]) + + ((limb) ((s32) in2[1])) * ((s32) in[8]) + + ((limb) ((s32) in2[8])) * ((s32) in[1]) + + ((limb) ((s32) in2[0])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[0]); + output[10] = 2 * (((limb) ((s32) in2[5])) * ((s32) in[5]) + + ((limb) ((s32) in2[3])) * ((s32) in[7]) + + ((limb) ((s32) in2[7])) * ((s32) in[3]) + + ((limb) ((s32) in2[1])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[1])) + + ((limb) ((s32) in2[4])) * ((s32) in[6]) + + ((limb) ((s32) in2[6])) * ((s32) in[4]) + + ((limb) ((s32) in2[2])) * ((s32) in[8]) + + ((limb) ((s32) in2[8])) * ((s32) in[2]); + output[11] = ((limb) ((s32) in2[5])) * ((s32) in[6]) + + ((limb) ((s32) in2[6])) * ((s32) in[5]) + + ((limb) ((s32) in2[4])) * ((s32) in[7]) + + ((limb) ((s32) in2[7])) * ((s32) in[4]) + + ((limb) ((s32) in2[3])) * ((s32) in[8]) + + ((limb) ((s32) in2[8])) * ((s32) in[3]) + + ((limb) ((s32) in2[2])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[2]); + output[12] = ((limb) ((s32) in2[6])) * ((s32) in[6]) + + 2 * (((limb) ((s32) in2[5])) * ((s32) in[7]) + + ((limb) ((s32) in2[7])) * ((s32) in[5]) + + ((limb) ((s32) in2[3])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[3])) + + ((limb) ((s32) in2[4])) * ((s32) in[8]) + + ((limb) ((s32) in2[8])) * ((s32) in[4]); + output[13] = ((limb) ((s32) in2[6])) * ((s32) in[7]) + + ((limb) ((s32) in2[7])) * ((s32) in[6]) + + ((limb) ((s32) in2[5])) * ((s32) in[8]) + + ((limb) ((s32) in2[8])) * ((s32) in[5]) + + ((limb) ((s32) in2[4])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[4]); + output[14] = 2 * (((limb) ((s32) in2[7])) * ((s32) in[7]) + + ((limb) ((s32) in2[5])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[5])) + + ((limb) ((s32) in2[6])) * ((s32) in[8]) + + ((limb) ((s32) in2[8])) * ((s32) in[6]); + output[15] = ((limb) ((s32) in2[7])) * ((s32) in[8]) + + ((limb) ((s32) in2[8])) * ((s32) in[7]) + + ((limb) ((s32) in2[6])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[6]); + output[16] = ((limb) ((s32) in2[8])) * ((s32) in[8]) + + 2 * (((limb) ((s32) in2[7])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[7])); + output[17] = ((limb) ((s32) in2[8])) * ((s32) in[9]) + + ((limb) ((s32) in2[9])) * ((s32) in[8]); + output[18] = 2 * ((limb) ((s32) in2[9])) * ((s32) in[9]); } void freduce_degree(limb *output) { - output[8] += output[18] << 4; - output[8] += output[18] << 1; - output[8] += output[18]; - output[7] += output[17] << 4; - output[7] += output[17] << 1; - output[7] += output[17]; - output[6] += output[16] << 4; - output[6] += output[16] << 1; - output[6] += output[16]; - output[5] += output[15] << 4; - output[5] += output[15] << 1; - output[5] += output[15]; - output[4] += output[14] << 4; - output[4] += output[14] << 1; - output[4] += output[14]; - output[3] += output[13] << 4; - output[3] += output[13] << 1; - output[3] += output[13]; - output[2] += output[12] << 4; - output[2] += output[12] << 1; - output[2] += output[12]; - output[1] += output[11] << 4; - output[1] += output[11] << 1; - output[1] += output[11]; - output[0] += output[10] << 4; - output[0] += output[10] << 1; - output[0] += output[10]; + output[8] += output[18] << 4; + output[8] += output[18] << 1; + output[8] += output[18]; + output[7] += output[17] << 4; + output[7] += output[17] << 1; + output[7] += output[17]; + output[6] += output[16] << 4; + output[6] += output[16] << 1; + output[6] += output[16]; + output[5] += output[15] << 4; + output[5] += output[15] << 1; + output[5] += output[15]; + output[4] += output[14] << 4; + output[4] += output[14] << 1; + output[4] += output[14]; + output[3] += output[13] << 4; + output[3] += output[13] << 1; + output[3] += output[13]; + output[2] += output[12] << 4; + output[2] += output[12] << 1; + output[2] += output[12]; + output[1] += output[11] << 4; + output[1] += output[11] << 1; + output[1] += output[11]; + output[0] += output[10] << 4; + output[0] += output[10] << 1; + output[0] += output[10]; } #if (-1 & 3) != 3 @@ -192,167 +195,167 @@ void freduce_degree(limb *output) { ZT_INLINE limb div_by_2_26(const limb v) { - /* High word of v; no shift needed. */ - const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); - /* Set to all 1s if v was negative; else set to 0s. */ - const int32_t sign = ((int32_t) highword) >> 31; - /* Set to 0x3ffffff if v was negative; else set to 0. */ - const int32_t roundoff = ((uint32_t) sign) >> 6; - /* Should return v / (1<<26) */ - return (v + roundoff) >> 26; + /* High word of v; no shift needed. */ + const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); + /* Set to all 1s if v was negative; else set to 0s. */ + const int32_t sign = ((int32_t) highword) >> 31; + /* Set to 0x3ffffff if v was negative; else set to 0. */ + const int32_t roundoff = ((uint32_t) sign) >> 6; + /* Should return v / (1<<26) */ + return (v + roundoff) >> 26; } ZT_INLINE limb div_by_2_25(const limb v) { - /* High word of v; no shift needed*/ - const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); - /* Set to all 1s if v was negative; else set to 0s. */ - const int32_t sign = ((int32_t) highword) >> 31; - /* Set to 0x1ffffff if v was negative; else set to 0. */ - const int32_t roundoff = ((uint32_t) sign) >> 7; - /* Should return v / (1<<25) */ - return (v + roundoff) >> 25; + /* High word of v; no shift needed*/ + const uint32_t highword = (uint32_t) (((uint64_t) v) >> 32); + /* Set to all 1s if v was negative; else set to 0s. */ + const int32_t sign = ((int32_t) highword) >> 31; + /* Set to 0x1ffffff if v was negative; else set to 0. */ + const int32_t roundoff = ((uint32_t) sign) >> 7; + /* Should return v / (1<<25) */ + return (v + roundoff) >> 25; } void freduce_coefficients(limb *output) { - unsigned i; + unsigned i; - output[10] = 0; + output[10] = 0; - for (i = 0; i < 10; i += 2) { - limb over = div_by_2_26(output[i]); - /* The entry condition (that |output[i]| < 280*2^54) means that over is, at - * most, 280*2^28 in the first iteration of this loop. This is added to the - * next limb and we can approximate the resulting bound of that limb by - * 281*2^54. */ - output[i] -= over << 26; - output[i+1] += over; + for (i = 0; i < 10; i += 2) { + limb over = div_by_2_26(output[i]); + /* The entry condition (that |output[i]| < 280*2^54) means that over is, at + * most, 280*2^28 in the first iteration of this loop. This is added to the + * next limb and we can approximate the resulting bound of that limb by + * 281*2^54. */ + output[i] -= over << 26; + output[i+1] += over; - /* For the first iteration, |output[i+1]| < 281*2^54, thus |over| < - * 281*2^29. When this is added to the next limb, the resulting bound can - * be approximated as 281*2^54. - * - * For subsequent iterations of the loop, 281*2^54 remains a conservative - * bound and no overflow occurs. */ - over = div_by_2_25(output[i+1]); - output[i+1] -= over << 25; - output[i+2] += over; - } - /* Now |output[10]| < 281*2^29 and all other coefficients are reduced. */ - output[0] += output[10] << 4; - output[0] += output[10] << 1; - output[0] += output[10]; + /* For the first iteration, |output[i+1]| < 281*2^54, thus |over| < + * 281*2^29. When this is added to the next limb, the resulting bound can + * be approximated as 281*2^54. + * + * For subsequent iterations of the loop, 281*2^54 remains a conservative + * bound and no overflow occurs. */ + over = div_by_2_25(output[i+1]); + output[i+1] -= over << 25; + output[i+2] += over; + } + /* Now |output[10]| < 281*2^29 and all other coefficients are reduced. */ + output[0] += output[10] << 4; + output[0] += output[10] << 1; + output[0] += output[10]; - output[10] = 0; + output[10] = 0; - /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29 - * So |over| will be no more than 2^16. */ - { - limb over = div_by_2_26(output[0]); - output[0] -= over << 26; - output[1] += over; - } + /* Now output[1..9] are reduced, and |output[0]| < 2^26 + 19*281*2^29 + * So |over| will be no more than 2^16. */ + { + limb over = div_by_2_26(output[0]); + output[0] -= over << 26; + output[1] += over; + } - /* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The - * bound on |output[1]| is sufficient to meet our needs. */ + /* Now output[0,2..9] are reduced, and |output[1]| < 2^25 + 2^16 < 2^26. The + * bound on |output[1]| is sufficient to meet our needs. */ } ZT_INLINE void fmul(limb *output,const limb *in,const limb *in2) { - limb t[19]; - fproduct(t, in, in2); - /* |t[i]| < 14*2^54 */ - freduce_degree(t); - freduce_coefficients(t); - /* |t[i]| < 2^26 */ - memcpy(output, t, sizeof(limb) * 10); + limb t[19]; + fproduct(t, in, in2); + /* |t[i]| < 14*2^54 */ + freduce_degree(t); + freduce_coefficients(t); + /* |t[i]| < 2^26 */ + Utils::copy(output,t); } void fsquare_inner(limb *output, const limb *in) { - output[0] = ((limb) ((s32) in[0])) * ((s32) in[0]); - output[1] = 2 * ((limb) ((s32) in[0])) * ((s32) in[1]); - output[2] = 2 * (((limb) ((s32) in[1])) * ((s32) in[1]) + - ((limb) ((s32) in[0])) * ((s32) in[2])); - output[3] = 2 * (((limb) ((s32) in[1])) * ((s32) in[2]) + - ((limb) ((s32) in[0])) * ((s32) in[3])); - output[4] = ((limb) ((s32) in[2])) * ((s32) in[2]) + - 4 * ((limb) ((s32) in[1])) * ((s32) in[3]) + - 2 * ((limb) ((s32) in[0])) * ((s32) in[4]); - output[5] = 2 * (((limb) ((s32) in[2])) * ((s32) in[3]) + - ((limb) ((s32) in[1])) * ((s32) in[4]) + - ((limb) ((s32) in[0])) * ((s32) in[5])); - output[6] = 2 * (((limb) ((s32) in[3])) * ((s32) in[3]) + - ((limb) ((s32) in[2])) * ((s32) in[4]) + - ((limb) ((s32) in[0])) * ((s32) in[6]) + - 2 * ((limb) ((s32) in[1])) * ((s32) in[5])); - output[7] = 2 * (((limb) ((s32) in[3])) * ((s32) in[4]) + - ((limb) ((s32) in[2])) * ((s32) in[5]) + - ((limb) ((s32) in[1])) * ((s32) in[6]) + - ((limb) ((s32) in[0])) * ((s32) in[7])); - output[8] = ((limb) ((s32) in[4])) * ((s32) in[4]) + - 2 * (((limb) ((s32) in[2])) * ((s32) in[6]) + - ((limb) ((s32) in[0])) * ((s32) in[8]) + - 2 * (((limb) ((s32) in[1])) * ((s32) in[7]) + - ((limb) ((s32) in[3])) * ((s32) in[5]))); - output[9] = 2 * (((limb) ((s32) in[4])) * ((s32) in[5]) + - ((limb) ((s32) in[3])) * ((s32) in[6]) + - ((limb) ((s32) in[2])) * ((s32) in[7]) + - ((limb) ((s32) in[1])) * ((s32) in[8]) + - ((limb) ((s32) in[0])) * ((s32) in[9])); - output[10] = 2 * (((limb) ((s32) in[5])) * ((s32) in[5]) + - ((limb) ((s32) in[4])) * ((s32) in[6]) + - ((limb) ((s32) in[2])) * ((s32) in[8]) + - 2 * (((limb) ((s32) in[3])) * ((s32) in[7]) + - ((limb) ((s32) in[1])) * ((s32) in[9]))); - output[11] = 2 * (((limb) ((s32) in[5])) * ((s32) in[6]) + - ((limb) ((s32) in[4])) * ((s32) in[7]) + - ((limb) ((s32) in[3])) * ((s32) in[8]) + - ((limb) ((s32) in[2])) * ((s32) in[9])); - output[12] = ((limb) ((s32) in[6])) * ((s32) in[6]) + - 2 * (((limb) ((s32) in[4])) * ((s32) in[8]) + - 2 * (((limb) ((s32) in[5])) * ((s32) in[7]) + - ((limb) ((s32) in[3])) * ((s32) in[9]))); - output[13] = 2 * (((limb) ((s32) in[6])) * ((s32) in[7]) + - ((limb) ((s32) in[5])) * ((s32) in[8]) + - ((limb) ((s32) in[4])) * ((s32) in[9])); - output[14] = 2 * (((limb) ((s32) in[7])) * ((s32) in[7]) + - ((limb) ((s32) in[6])) * ((s32) in[8]) + - 2 * ((limb) ((s32) in[5])) * ((s32) in[9])); - output[15] = 2 * (((limb) ((s32) in[7])) * ((s32) in[8]) + - ((limb) ((s32) in[6])) * ((s32) in[9])); - output[16] = ((limb) ((s32) in[8])) * ((s32) in[8]) + - 4 * ((limb) ((s32) in[7])) * ((s32) in[9]); - output[17] = 2 * ((limb) ((s32) in[8])) * ((s32) in[9]); - output[18] = 2 * ((limb) ((s32) in[9])) * ((s32) in[9]); + output[0] = ((limb) ((s32) in[0])) * ((s32) in[0]); + output[1] = 2 * ((limb) ((s32) in[0])) * ((s32) in[1]); + output[2] = 2 * (((limb) ((s32) in[1])) * ((s32) in[1]) + + ((limb) ((s32) in[0])) * ((s32) in[2])); + output[3] = 2 * (((limb) ((s32) in[1])) * ((s32) in[2]) + + ((limb) ((s32) in[0])) * ((s32) in[3])); + output[4] = ((limb) ((s32) in[2])) * ((s32) in[2]) + + 4 * ((limb) ((s32) in[1])) * ((s32) in[3]) + + 2 * ((limb) ((s32) in[0])) * ((s32) in[4]); + output[5] = 2 * (((limb) ((s32) in[2])) * ((s32) in[3]) + + ((limb) ((s32) in[1])) * ((s32) in[4]) + + ((limb) ((s32) in[0])) * ((s32) in[5])); + output[6] = 2 * (((limb) ((s32) in[3])) * ((s32) in[3]) + + ((limb) ((s32) in[2])) * ((s32) in[4]) + + ((limb) ((s32) in[0])) * ((s32) in[6]) + + 2 * ((limb) ((s32) in[1])) * ((s32) in[5])); + output[7] = 2 * (((limb) ((s32) in[3])) * ((s32) in[4]) + + ((limb) ((s32) in[2])) * ((s32) in[5]) + + ((limb) ((s32) in[1])) * ((s32) in[6]) + + ((limb) ((s32) in[0])) * ((s32) in[7])); + output[8] = ((limb) ((s32) in[4])) * ((s32) in[4]) + + 2 * (((limb) ((s32) in[2])) * ((s32) in[6]) + + ((limb) ((s32) in[0])) * ((s32) in[8]) + + 2 * (((limb) ((s32) in[1])) * ((s32) in[7]) + + ((limb) ((s32) in[3])) * ((s32) in[5]))); + output[9] = 2 * (((limb) ((s32) in[4])) * ((s32) in[5]) + + ((limb) ((s32) in[3])) * ((s32) in[6]) + + ((limb) ((s32) in[2])) * ((s32) in[7]) + + ((limb) ((s32) in[1])) * ((s32) in[8]) + + ((limb) ((s32) in[0])) * ((s32) in[9])); + output[10] = 2 * (((limb) ((s32) in[5])) * ((s32) in[5]) + + ((limb) ((s32) in[4])) * ((s32) in[6]) + + ((limb) ((s32) in[2])) * ((s32) in[8]) + + 2 * (((limb) ((s32) in[3])) * ((s32) in[7]) + + ((limb) ((s32) in[1])) * ((s32) in[9]))); + output[11] = 2 * (((limb) ((s32) in[5])) * ((s32) in[6]) + + ((limb) ((s32) in[4])) * ((s32) in[7]) + + ((limb) ((s32) in[3])) * ((s32) in[8]) + + ((limb) ((s32) in[2])) * ((s32) in[9])); + output[12] = ((limb) ((s32) in[6])) * ((s32) in[6]) + + 2 * (((limb) ((s32) in[4])) * ((s32) in[8]) + + 2 * (((limb) ((s32) in[5])) * ((s32) in[7]) + + ((limb) ((s32) in[3])) * ((s32) in[9]))); + output[13] = 2 * (((limb) ((s32) in[6])) * ((s32) in[7]) + + ((limb) ((s32) in[5])) * ((s32) in[8]) + + ((limb) ((s32) in[4])) * ((s32) in[9])); + output[14] = 2 * (((limb) ((s32) in[7])) * ((s32) in[7]) + + ((limb) ((s32) in[6])) * ((s32) in[8]) + + 2 * ((limb) ((s32) in[5])) * ((s32) in[9])); + output[15] = 2 * (((limb) ((s32) in[7])) * ((s32) in[8]) + + ((limb) ((s32) in[6])) * ((s32) in[9])); + output[16] = ((limb) ((s32) in[8])) * ((s32) in[8]) + + 4 * ((limb) ((s32) in[7])) * ((s32) in[9]); + output[17] = 2 * ((limb) ((s32) in[8])) * ((s32) in[9]); + output[18] = 2 * ((limb) ((s32) in[9])) * ((s32) in[9]); } ZT_INLINE void fsquare(limb *output,const limb *in) { - limb t[19]; - fsquare_inner(t, in); - /* |t[i]| < 14*2^54 because the largest product of two limbs will be < - * 2^(27+27) and fsquare_inner adds together, at most, 14 of those - * products. */ - freduce_degree(t); - freduce_coefficients(t); - /* |t[i]| < 2^26 */ - memcpy(output, t, sizeof(limb) * 10); + limb t[19]; + fsquare_inner(t, in); + /* |t[i]| < 14*2^54 because the largest product of two limbs will be < + * 2^(27+27) and fsquare_inner adds together, at most, 14 of those + * products. */ + freduce_degree(t); + freduce_coefficients(t); + /* |t[i]| < 2^26 */ + Utils::copy(output,t); } ZT_INLINE void fexpand(limb *output,const u8 *input) { #define F(n,start,shift,mask) \ - output[n] = ((((limb) input[start + 0]) | \ - ((limb) input[start + 1]) << 8 | \ - ((limb) input[start + 2]) << 16 | \ - ((limb) input[start + 3]) << 24) >> shift) & mask; - F(0, 0, 0, 0x3ffffff); - F(1, 3, 2, 0x1ffffff); - F(2, 6, 3, 0x3ffffff); - F(3, 9, 5, 0x1ffffff); - F(4, 12, 6, 0x3ffffff); - F(5, 16, 0, 0x1ffffff); - F(6, 19, 1, 0x3ffffff); - F(7, 22, 3, 0x1ffffff); - F(8, 25, 4, 0x3ffffff); - F(9, 28, 6, 0x1ffffff); + output[n] = ((((limb) input[start + 0]) | \ + ((limb) input[start + 1]) << 8 | \ + ((limb) input[start + 2]) << 16 | \ + ((limb) input[start + 3]) << 24) >> shift) & mask; + F(0, 0, 0, 0x3ffffff); + F(1, 3, 2, 0x1ffffff); + F(2, 6, 3, 0x3ffffff); + F(3, 9, 5, 0x1ffffff); + F(4, 12, 6, 0x3ffffff); + F(5, 16, 0, 0x1ffffff); + F(6, 19, 1, 0x3ffffff); + F(7, 22, 3, 0x1ffffff); + F(8, 25, 4, 0x3ffffff); + F(9, 28, 6, 0x1ffffff); #undef F } @@ -361,333 +364,333 @@ ZT_INLINE void fexpand(limb *output,const u8 *input) { #endif ZT_INLINE s32 s32_eq(s32 a,s32 b) { - a = ~(a ^ b); - a &= a << 16; - a &= a << 8; - a &= a << 4; - a &= a << 2; - a &= a << 1; - return a >> 31; + a = ~(a ^ b); + a &= a << 16; + a &= a << 8; + a &= a << 4; + a &= a << 2; + a &= a << 1; + return a >> 31; } ZT_INLINE s32 s32_gte(s32 a,s32 b) { - a -= b; - /* a >= 0 iff a >= b. */ - return ~(a >> 31); + a -= b; + /* a >= 0 iff a >= b. */ + return ~(a >> 31); } -ZT_INLINE void fcontract(u8 *output,limb *input_limbs) { - int i; - int j; - s32 input[10]; - s32 mask; +void fcontract(u8 *output,limb *input_limbs) { + int i; + int j; + s32 input[10]; + s32 mask; - for (i = 0; i < 10; i++) { - input[i] = input_limbs[i]; - } - for (j = 0; j < 2; ++j) { - for (i = 0; i < 9; ++i) { - if ((i & 1) == 1) { - const s32 mm = input[i] >> 31; - const s32 carry = -((input[i] & mm) >> 25); - input[i] = input[i] + (carry << 25); - input[i+1] = input[i+1] - carry; - } else { - const s32 mm = input[i] >> 31; - const s32 carry = -((input[i] & mm) >> 26); - input[i] = input[i] + (carry << 26); - input[i+1] = input[i+1] - carry; - } - } - { - const s32 mm = input[9] >> 31; - const s32 carry = -((input[9] & mm) >> 25); - input[9] = input[9] + (carry << 25); - input[0] = input[0] - (carry * 19); - } - } - { - const s32 mm = input[0] >> 31; - const s32 carry = -((input[0] & mm) >> 26); - input[0] = input[0] + (carry << 26); - input[1] = input[1] - carry; - } - for (j = 0; j < 2; j++) { - for (i = 0; i < 9; i++) { - if ((i & 1) == 1) { - const s32 carry = input[i] >> 25; - input[i] &= 0x1ffffff; - input[i+1] += carry; - } else { - const s32 carry = input[i] >> 26; - input[i] &= 0x3ffffff; - input[i+1] += carry; - } - } - { - const s32 carry = input[9] >> 25; - input[9] &= 0x1ffffff; - input[0] += 19*carry; - } - } - mask = s32_gte(input[0], 0x3ffffed); - for (i = 1; i < 10; i++) { - if ((i & 1) == 1) { - mask &= s32_eq(input[i], 0x1ffffff); - } else { - mask &= s32_eq(input[i], 0x3ffffff); - } - } - input[0] -= mask & 0x3ffffed; - for (i = 1; i < 10; i++) { - if ((i & 1) == 1) { - input[i] -= mask & 0x1ffffff; - } else { - input[i] -= mask & 0x3ffffff; - } - } + for (i = 0; i < 10; i++) { + input[i] = input_limbs[i]; + } + for (j = 0; j < 2; ++j) { + for (i = 0; i < 9; ++i) { + if ((i & 1) == 1) { + const s32 mm = input[i] >> 31; + const s32 carry = -((input[i] & mm) >> 25); + input[i] = input[i] + (carry << 25); + input[i+1] = input[i+1] - carry; + } else { + const s32 mm = input[i] >> 31; + const s32 carry = -((input[i] & mm) >> 26); + input[i] = input[i] + (carry << 26); + input[i+1] = input[i+1] - carry; + } + } + { + const s32 mm = input[9] >> 31; + const s32 carry = -((input[9] & mm) >> 25); + input[9] = input[9] + (carry << 25); + input[0] = input[0] - (carry * 19); + } + } + { + const s32 mm = input[0] >> 31; + const s32 carry = -((input[0] & mm) >> 26); + input[0] = input[0] + (carry << 26); + input[1] = input[1] - carry; + } + for (j = 0; j < 2; j++) { + for (i = 0; i < 9; i++) { + if ((i & 1) == 1) { + const s32 carry = input[i] >> 25; + input[i] &= 0x1ffffff; + input[i+1] += carry; + } else { + const s32 carry = input[i] >> 26; + input[i] &= 0x3ffffff; + input[i+1] += carry; + } + } + { + const s32 carry = input[9] >> 25; + input[9] &= 0x1ffffff; + input[0] += 19*carry; + } + } + mask = s32_gte(input[0], 0x3ffffed); + for (i = 1; i < 10; i++) { + if ((i & 1) == 1) { + mask &= s32_eq(input[i], 0x1ffffff); + } else { + mask &= s32_eq(input[i], 0x3ffffff); + } + } + input[0] -= mask & 0x3ffffed; + for (i = 1; i < 10; i++) { + if ((i & 1) == 1) { + input[i] -= mask & 0x1ffffff; + } else { + input[i] -= mask & 0x3ffffff; + } + } - input[1] <<= 2; - input[2] <<= 3; - input[3] <<= 5; - input[4] <<= 6; - input[6] <<= 1; - input[7] <<= 3; - input[8] <<= 4; - input[9] <<= 6; + input[1] <<= 2; + input[2] <<= 3; + input[3] <<= 5; + input[4] <<= 6; + input[6] <<= 1; + input[7] <<= 3; + input[8] <<= 4; + input[9] <<= 6; #define F(i, s) \ - output[s+0] |= input[i] & 0xff; \ - output[s+1] = (input[i] >> 8) & 0xff; \ - output[s+2] = (input[i] >> 16) & 0xff; \ - output[s+3] = (input[i] >> 24) & 0xff; - output[0] = 0; - output[16] = 0; - F(0,0); - F(1,3); - F(2,6); - F(3,9); - F(4,12); - F(5,16); - F(6,19); - F(7,22); - F(8,25); - F(9,28); + output[s+0] |= input[i] & 0xff; \ + output[s+1] = (input[i] >> 8) & 0xff; \ + output[s+2] = (input[i] >> 16) & 0xff; \ + output[s+3] = (input[i] >> 24) & 0xff; + output[0] = 0; + output[16] = 0; + F(0,0); + F(1,3); + F(2,6); + F(3,9); + F(4,12); + F(5,16); + F(6,19); + F(7,22); + F(8,25); + F(9,28); #undef F } -ZT_INLINE void fmonty(limb *x2,limb *z2, /* output 2Q */ - limb *x3,limb *z3, /* output Q + Q' */ - limb *x,limb *z, /* input Q */ - limb *xprime,limb *zprime, /* input Q' */ - const limb *qmqp /* input Q - Q' */) { - limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19], - zzprime[19], zzzprime[19], xxxprime[19]; +void fmonty(limb *x2,limb *z2, /* output 2Q */ + limb *x3,limb *z3, /* output Q + Q' */ + limb *x,limb *z, /* input Q */ + limb *xprime,limb *zprime, /* input Q' */ + const limb *qmqp /* input Q - Q' */) { + limb origx[10], origxprime[10], zzz[19], xx[19], zz[19], xxprime[19], + zzprime[19], zzzprime[19], xxxprime[19]; - memcpy(origx, x, 10 * sizeof(limb)); - fsum(x, z); - /* |x[i]| < 2^27 */ - fdifference(z, origx); /* does x - z */ - /* |z[i]| < 2^27 */ + Utils::copy<10 * sizeof(limb)>(origx,x); + fsum(x, z); + /* |x[i]| < 2^27 */ + fdifference(z, origx); /* does x - z */ + /* |z[i]| < 2^27 */ - memcpy(origxprime, xprime, sizeof(limb) * 10); - fsum(xprime, zprime); - /* |xprime[i]| < 2^27 */ - fdifference(zprime, origxprime); - /* |zprime[i]| < 2^27 */ - fproduct(xxprime, xprime, z); - /* |xxprime[i]| < 14*2^54: the largest product of two limbs will be < - * 2^(27+27) and fproduct adds together, at most, 14 of those products. - * (Approximating that to 2^58 doesn't work out.) */ - fproduct(zzprime, x, zprime); - /* |zzprime[i]| < 14*2^54 */ - freduce_degree(xxprime); - freduce_coefficients(xxprime); - /* |xxprime[i]| < 2^26 */ - freduce_degree(zzprime); - freduce_coefficients(zzprime); - /* |zzprime[i]| < 2^26 */ - memcpy(origxprime, xxprime, sizeof(limb) * 10); - fsum(xxprime, zzprime); - /* |xxprime[i]| < 2^27 */ - fdifference(zzprime, origxprime); - /* |zzprime[i]| < 2^27 */ - fsquare(xxxprime, xxprime); - /* |xxxprime[i]| < 2^26 */ - fsquare(zzzprime, zzprime); - /* |zzzprime[i]| < 2^26 */ - fproduct(zzprime, zzzprime, qmqp); - /* |zzprime[i]| < 14*2^52 */ - freduce_degree(zzprime); - freduce_coefficients(zzprime); - /* |zzprime[i]| < 2^26 */ - memcpy(x3, xxxprime, sizeof(limb) * 10); - memcpy(z3, zzprime, sizeof(limb) * 10); + Utils::copy(origxprime, xprime); + fsum(xprime, zprime); + /* |xprime[i]| < 2^27 */ + fdifference(zprime, origxprime); + /* |zprime[i]| < 2^27 */ + fproduct(xxprime, xprime, z); + /* |xxprime[i]| < 14*2^54: the largest product of two limbs will be < + * 2^(27+27) and fproduct adds together, at most, 14 of those products. + * (Approximating that to 2^58 doesn't work out.) */ + fproduct(zzprime, x, zprime); + /* |zzprime[i]| < 14*2^54 */ + freduce_degree(xxprime); + freduce_coefficients(xxprime); + /* |xxprime[i]| < 2^26 */ + freduce_degree(zzprime); + freduce_coefficients(zzprime); + /* |zzprime[i]| < 2^26 */ + Utils::copy(origxprime,xxprime); + fsum(xxprime, zzprime); + /* |xxprime[i]| < 2^27 */ + fdifference(zzprime, origxprime); + /* |zzprime[i]| < 2^27 */ + fsquare(xxxprime, xxprime); + /* |xxxprime[i]| < 2^26 */ + fsquare(zzzprime, zzprime); + /* |zzzprime[i]| < 2^26 */ + fproduct(zzprime, zzzprime, qmqp); + /* |zzprime[i]| < 14*2^52 */ + freduce_degree(zzprime); + freduce_coefficients(zzprime); + /* |zzprime[i]| < 2^26 */ + Utils::copy(x3,xxxprime); + Utils::copy(z3,zzprime); - fsquare(xx, x); - /* |xx[i]| < 2^26 */ - fsquare(zz, z); - /* |zz[i]| < 2^26 */ - fproduct(x2, xx, zz); - /* |x2[i]| < 14*2^52 */ - freduce_degree(x2); - freduce_coefficients(x2); - /* |x2[i]| < 2^26 */ - fdifference(zz, xx); // does zz = xx - zz - /* |zz[i]| < 2^27 */ - memset(zzz + 10, 0, sizeof(limb) * 9); - fscalar_product(zzz, zz, 121665); - /* |zzz[i]| < 2^(27+17) */ - /* No need to call freduce_degree here: - fscalar_product doesn't increase the degree of its input. */ - freduce_coefficients(zzz); - /* |zzz[i]| < 2^26 */ - fsum(zzz, xx); - /* |zzz[i]| < 2^27 */ - fproduct(z2, zz, zzz); - /* |z2[i]| < 14*2^(26+27) */ - freduce_degree(z2); - freduce_coefficients(z2); - /* |z2|i| < 2^26 */ + fsquare(xx, x); + /* |xx[i]| < 2^26 */ + fsquare(zz, z); + /* |zz[i]| < 2^26 */ + fproduct(x2, xx, zz); + /* |x2[i]| < 14*2^52 */ + freduce_degree(x2); + freduce_coefficients(x2); + /* |x2[i]| < 2^26 */ + fdifference(zz, xx); // does zz = xx - zz + /* |zz[i]| < 2^27 */ + Utils::zero(zzz + 10); + fscalar_product(zzz, zz, 121665); + /* |zzz[i]| < 2^(27+17) */ + /* No need to call freduce_degree here: + fscalar_product doesn't increase the degree of its input. */ + freduce_coefficients(zzz); + /* |zzz[i]| < 2^26 */ + fsum(zzz, xx); + /* |zzz[i]| < 2^27 */ + fproduct(z2, zz, zzz); + /* |z2[i]| < 14*2^(26+27) */ + freduce_degree(z2); + freduce_coefficients(z2); + /* |z2|i| < 2^26 */ } ZT_INLINE void swap_conditional(limb a[19],limb b[19],limb iswap) { - unsigned i; - const s32 swap = (s32) -iswap; + unsigned i; + const s32 swap = (s32) -iswap; - for (i = 0; i < 10; ++i) { - const s32 x = swap & ( ((s32)a[i]) ^ ((s32)b[i]) ); - a[i] = ((s32)a[i]) ^ x; - b[i] = ((s32)b[i]) ^ x; - } + for (i = 0; i < 10; ++i) { + const s32 x = swap & ( ((s32)a[i]) ^ ((s32)b[i]) ); + a[i] = ((s32)a[i]) ^ x; + b[i] = ((s32)b[i]) ^ x; + } } -ZT_INLINE void cmult(limb *resultx,limb *resultz,const u8 *n,const limb *q) { - limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0}; - limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; - limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1}; - limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; +void cmult(limb *resultx,limb *resultz,const u8 *n,const limb *q) { + limb a[19] = {0}, b[19] = {1}, c[19] = {1}, d[19] = {0}; + limb *nqpqx = a, *nqpqz = b, *nqx = c, *nqz = d, *t; + limb e[19] = {0}, f[19] = {1}, g[19] = {0}, h[19] = {1}; + limb *nqpqx2 = e, *nqpqz2 = f, *nqx2 = g, *nqz2 = h; - unsigned i, j; + unsigned i, j; - memcpy(nqpqx, q, sizeof(limb) * 10); + Utils::copy(nqpqx,q); - for (i = 0; i < 32; ++i) { - u8 byte = n[31 - i]; - for (j = 0; j < 8; ++j) { - const limb bit = byte >> 7; + for (i = 0; i < 32; ++i) { + u8 byte = n[31 - i]; + for (j = 0; j < 8; ++j) { + const limb bit = byte >> 7; - swap_conditional(nqx, nqpqx, bit); - swap_conditional(nqz, nqpqz, bit); - fmonty(nqx2, nqz2, - nqpqx2, nqpqz2, - nqx, nqz, - nqpqx, nqpqz, - q); - swap_conditional(nqx2, nqpqx2, bit); - swap_conditional(nqz2, nqpqz2, bit); + swap_conditional(nqx, nqpqx, bit); + swap_conditional(nqz, nqpqz, bit); + fmonty(nqx2, nqz2, + nqpqx2, nqpqz2, + nqx, nqz, + nqpqx, nqpqz, + q); + swap_conditional(nqx2, nqpqx2, bit); + swap_conditional(nqz2, nqpqz2, bit); - t = nqx; - nqx = nqx2; - nqx2 = t; - t = nqz; - nqz = nqz2; - nqz2 = t; - t = nqpqx; - nqpqx = nqpqx2; - nqpqx2 = t; - t = nqpqz; - nqpqz = nqpqz2; - nqpqz2 = t; + t = nqx; + nqx = nqx2; + nqx2 = t; + t = nqz; + nqz = nqz2; + nqz2 = t; + t = nqpqx; + nqpqx = nqpqx2; + nqpqx2 = t; + t = nqpqz; + nqpqz = nqpqz2; + nqpqz2 = t; - byte <<= 1; - } - } + byte <<= 1; + } + } - memcpy(resultx, nqx, sizeof(limb) * 10); - memcpy(resultz, nqz, sizeof(limb) * 10); + Utils::copy(resultx,nqx); + Utils::copy(resultz,nqz); } ZT_INLINE void crecip(limb *out,const limb *z) { - limb z2[10]; - limb z9[10]; - limb z11[10]; - limb z2_5_0[10]; - limb z2_10_0[10]; - limb z2_20_0[10]; - limb z2_50_0[10]; - limb z2_100_0[10]; - limb t0[10]; - limb t1[10]; - int i; + limb z2[10]; + limb z9[10]; + limb z11[10]; + limb z2_5_0[10]; + limb z2_10_0[10]; + limb z2_20_0[10]; + limb z2_50_0[10]; + limb z2_100_0[10]; + limb t0[10]; + limb t1[10]; + int i; - /* 2 */ fsquare(z2,z); - /* 4 */ fsquare(t1,z2); - /* 8 */ fsquare(t0,t1); - /* 9 */ fmul(z9,t0,z); - /* 11 */ fmul(z11,z9,z2); - /* 22 */ fsquare(t0,z11); - /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9); + /* 2 */ fsquare(z2,z); + /* 4 */ fsquare(t1,z2); + /* 8 */ fsquare(t0,t1); + /* 9 */ fmul(z9,t0,z); + /* 11 */ fmul(z11,z9,z2); + /* 22 */ fsquare(t0,z11); + /* 2^5 - 2^0 = 31 */ fmul(z2_5_0,t0,z9); - /* 2^6 - 2^1 */ fsquare(t0,z2_5_0); - /* 2^7 - 2^2 */ fsquare(t1,t0); - /* 2^8 - 2^3 */ fsquare(t0,t1); - /* 2^9 - 2^4 */ fsquare(t1,t0); - /* 2^10 - 2^5 */ fsquare(t0,t1); - /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0); + /* 2^6 - 2^1 */ fsquare(t0,z2_5_0); + /* 2^7 - 2^2 */ fsquare(t1,t0); + /* 2^8 - 2^3 */ fsquare(t0,t1); + /* 2^9 - 2^4 */ fsquare(t1,t0); + /* 2^10 - 2^5 */ fsquare(t0,t1); + /* 2^10 - 2^0 */ fmul(z2_10_0,t0,z2_5_0); - /* 2^11 - 2^1 */ fsquare(t0,z2_10_0); - /* 2^12 - 2^2 */ fsquare(t1,t0); - /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } - /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0); + /* 2^11 - 2^1 */ fsquare(t0,z2_10_0); + /* 2^12 - 2^2 */ fsquare(t1,t0); + /* 2^20 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^20 - 2^0 */ fmul(z2_20_0,t1,z2_10_0); - /* 2^21 - 2^1 */ fsquare(t0,z2_20_0); - /* 2^22 - 2^2 */ fsquare(t1,t0); - /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } - /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0); + /* 2^21 - 2^1 */ fsquare(t0,z2_20_0); + /* 2^22 - 2^2 */ fsquare(t1,t0); + /* 2^40 - 2^20 */ for (i = 2;i < 20;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^40 - 2^0 */ fmul(t0,t1,z2_20_0); - /* 2^41 - 2^1 */ fsquare(t1,t0); - /* 2^42 - 2^2 */ fsquare(t0,t1); - /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); } - /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0); + /* 2^41 - 2^1 */ fsquare(t1,t0); + /* 2^42 - 2^2 */ fsquare(t0,t1); + /* 2^50 - 2^10 */ for (i = 2;i < 10;i += 2) { fsquare(t1,t0); fsquare(t0,t1); } + /* 2^50 - 2^0 */ fmul(z2_50_0,t0,z2_10_0); - /* 2^51 - 2^1 */ fsquare(t0,z2_50_0); - /* 2^52 - 2^2 */ fsquare(t1,t0); - /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } - /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0); + /* 2^51 - 2^1 */ fsquare(t0,z2_50_0); + /* 2^52 - 2^2 */ fsquare(t1,t0); + /* 2^100 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^100 - 2^0 */ fmul(z2_100_0,t1,z2_50_0); - /* 2^101 - 2^1 */ fsquare(t1,z2_100_0); - /* 2^102 - 2^2 */ fsquare(t0,t1); - /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); } - /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0); + /* 2^101 - 2^1 */ fsquare(t1,z2_100_0); + /* 2^102 - 2^2 */ fsquare(t0,t1); + /* 2^200 - 2^100 */ for (i = 2;i < 100;i += 2) { fsquare(t1,t0); fsquare(t0,t1); } + /* 2^200 - 2^0 */ fmul(t1,t0,z2_100_0); - /* 2^201 - 2^1 */ fsquare(t0,t1); - /* 2^202 - 2^2 */ fsquare(t1,t0); - /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } - /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0); + /* 2^201 - 2^1 */ fsquare(t0,t1); + /* 2^202 - 2^2 */ fsquare(t1,t0); + /* 2^250 - 2^50 */ for (i = 2;i < 50;i += 2) { fsquare(t0,t1); fsquare(t1,t0); } + /* 2^250 - 2^0 */ fmul(t0,t1,z2_50_0); - /* 2^251 - 2^1 */ fsquare(t1,t0); - /* 2^252 - 2^2 */ fsquare(t0,t1); - /* 2^253 - 2^3 */ fsquare(t1,t0); - /* 2^254 - 2^4 */ fsquare(t0,t1); - /* 2^255 - 2^5 */ fsquare(t1,t0); - /* 2^255 - 21 */ fmul(out,t1,z11); + /* 2^251 - 2^1 */ fsquare(t1,t0); + /* 2^252 - 2^2 */ fsquare(t0,t1); + /* 2^253 - 2^3 */ fsquare(t1,t0); + /* 2^254 - 2^4 */ fsquare(t0,t1); + /* 2^255 - 2^5 */ fsquare(t1,t0); + /* 2^255 - 21 */ fmul(out,t1,z11); } void crypto_scalarmult(u8 *mypublic, const u8 *secret, const u8 *basepoint) { - limb bp[10], x[10], z[11], zmone[10]; + limb bp[10], x[10], z[11], zmone[10]; uint8_t e[32]; - int i; + int i; - for (i = 0; i < 32; ++i) e[i] = secret[i]; - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; + for (i = 0; i < 32; ++i) e[i] = secret[i]; + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; - fexpand(bp, basepoint); - cmult(x, z, e, bp); - crecip(zmone, z); - fmul(z, x, zmone); - fcontract(mypublic, z); + fexpand(bp, basepoint); + cmult(x, z, e, bp); + crecip(zmone, z); + fmul(z, x, zmone); + fcontract(mypublic, z); } static const unsigned char base[32] = {9}; @@ -768,7 +771,7 @@ ZT_INLINE crypto_uint32 ge(crypto_uint32 a,crypto_uint32 b) /* 16-bit inputs */ ZT_INLINE crypto_uint32 times19(crypto_uint32 a) { return (a << 4) + (a << 1) + a; } ZT_INLINE crypto_uint32 times38(crypto_uint32 a) { return (a << 5) + (a << 2) + (a << 1); } -ZT_INLINE void reduce_add_sub(fe25519 *r) +void reduce_add_sub(fe25519 *r) { int i,rep; for(rep=0;rep<4;rep++) @@ -804,7 +807,7 @@ ZT_INLINE void reduce_mul(fe25519 *r) } } -ZT_INLINE void fe25519_freeze(fe25519 *r) +void fe25519_freeze(fe25519 *r) { int i; crypto_uint32 mm = equal(r->v[31],127); @@ -836,7 +839,7 @@ ZT_INLINE void fe25519_pack(unsigned char r[32],const fe25519 *x) r[i] = y.v[i]; } -int fe25519_iseq_vartime(const fe25519 *x, const fe25519 *y) +ZT_INLINE int fe25519_iseq_vartime(const fe25519 *x, const fe25519 *y) { int i; fe25519 t1 = *x; @@ -885,7 +888,7 @@ void fe25519_neg(fe25519 *r, const fe25519 *x) fe25519_sub(r, r, &t); } -void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y) +ZT_INLINE void fe25519_add(fe25519 *r, const fe25519 *x, const fe25519 *y) { int i; for(i=0;i<32;i++) r->v[i] = x->v[i] + y->v[i]; @@ -922,7 +925,7 @@ void fe25519_mul(fe25519 *r, const fe25519 *x, const fe25519 *y) ZT_INLINE void fe25519_square(fe25519 *r,const fe25519 *x) { fe25519_mul(r,x,x); } -void fe25519_invert(fe25519 *r, const fe25519 *x) +ZT_INLINE void fe25519_invert(fe25519 *r, const fe25519 *x) { fe25519 z2; fe25519 z9; @@ -989,7 +992,7 @@ void fe25519_invert(fe25519 *r, const fe25519 *x) /* 2^255 - 21 */ fe25519_mul(r,&t1,&z11); } -void fe25519_pow2523(fe25519 *r, const fe25519 *x) +ZT_INLINE void fe25519_pow2523(fe25519 *r, const fe25519 *x) { fe25519 z2; fe25519 z9; @@ -1084,8 +1087,10 @@ void barrett_reduce(sc25519 *r, const crypto_uint32 x[64]) crypto_uint32 r2[33]; crypto_uint32 pb = 0; - for (i = 0;i < 66;++i) q2[i] = 0; - for (i = 0;i < 33;++i) r2[i] = 0; + Utils::zero(q2); + //for (i = 0;i < 66;++i) q2[i] = 0; + Utils::zero(r2); + //for (i = 0;i < 33;++i) r2[i] = 0; for(i=0;i<33;i++) for(j=0;j<33;j++) @@ -1151,7 +1156,7 @@ ZT_INLINE void sc25519_add(sc25519 *r,const sc25519 *x,const sc25519 *y) reduce_add_sub(r); } -void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y) +ZT_INLINE void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y) { int i,j; crypto_uint32 t[64]; @@ -1170,7 +1175,7 @@ void sc25519_mul(sc25519 *r, const sc25519 *x, const sc25519 *y) barrett_reduce(r, t); } -void sc25519_window3(signed char r[85], const sc25519 *s) +ZT_INLINE void sc25519_window3(signed char r[85], const sc25519 *s) { char carry; int i; @@ -2225,7 +2230,7 @@ ZT_INLINE void setneutral(ge25519 *r) } /* return 0 on success, -1 otherwise */ -int ge25519_unpackneg_vartime(ge25519_p3 *r, const unsigned char p[32]) +ZT_INLINE int ge25519_unpackneg_vartime(ge25519_p3 *r, const unsigned char p[32]) { unsigned char par; fe25519 t, chk, num, den, den2, den4, den6; @@ -2272,7 +2277,7 @@ int ge25519_unpackneg_vartime(ge25519_p3 *r, const unsigned char p[32]) return 0; } -ZT_INLINE void ge25519_pack(unsigned char r[32],const ge25519_p3 *p) +void ge25519_pack(unsigned char r[32],const ge25519_p3 *p) { fe25519 tx, ty, zi; fe25519_invert(&zi, &p->z); @@ -2283,7 +2288,7 @@ ZT_INLINE void ge25519_pack(unsigned char r[32],const ge25519_p3 *p) } /* computes [s1]p1 + [s2]p2 */ -void ge25519_double_scalarmult_vartime(ge25519_p3 *r, const ge25519_p3 *p1, const sc25519 *s1, const ge25519_p3 *p2, const sc25519 *s2) +ZT_INLINE void ge25519_double_scalarmult_vartime(ge25519_p3 *r, const ge25519_p3 *p1, const sc25519 *s1, const ge25519_p3 *p2, const sc25519 *s2) { ge25519_p1p1 tp1p1; ge25519_p3 pre[16]; @@ -2328,7 +2333,7 @@ void ge25519_double_scalarmult_vartime(ge25519_p3 *r, const ge25519_p3 *p1, cons } } -ZT_INLINE void ge25519_scalarmult_base(ge25519_p3 *r,const sc25519 *s) +void ge25519_scalarmult_base(ge25519_p3 *r,const sc25519 *s) { signed char b[85]; int i; @@ -2434,8 +2439,8 @@ bool C25519::verify(const uint8_t their[ZT_C25519_PUBLIC_KEY_LEN],const void *ms if ((siglen == 96)&&(!Utils::secureEq(sig+64,digest,32))) { return false; } else if (siglen == 64) { - memcpy(sigtmp,sig,64); - memcpy(sigtmp+64,digest,32); + Utils::copy<64>(sigtmp,sig); + Utils::copy<32>(sigtmp+64,digest); sig = sigtmp; } diff --git a/node/Capability.cpp b/node/Capability.cpp index f12abcee7..efea90df6 100644 --- a/node/Capability.cpp +++ b/node/Capability.cpp @@ -110,7 +110,7 @@ int Capability::unmarshal(const uint8_t *data,int len) noexcept _custody[i].signatureLength = sl; if ((sl > sizeof(_custody[i].signature))||((p + (int)sl) > len)) return -1; - memcpy(_custody[i].signature,data + p,sl); p += (int)sl; + Utils::copy(_custody[i].signature,data + p,sl); p += (int)sl; } if ((p + 2) > len) @@ -281,18 +281,18 @@ int Capability::unmarshalVirtualNetworkRules(const uint8_t *const data,const int case ZT_NETWORK_RULE_MATCH_MAC_SOURCE: case ZT_NETWORK_RULE_MATCH_MAC_DEST: if ((p + 6) > len) return -1; - memcpy(rules[ruleCount].v.mac,data + p,6); p += 6; + Utils::copy<6>(rules[ruleCount].v.mac,data + p); p += 6; break; case ZT_NETWORK_RULE_MATCH_IPV4_SOURCE: case ZT_NETWORK_RULE_MATCH_IPV4_DEST: if ((p + 5) > len) return -1; - memcpy(&(rules[ruleCount].v.ipv4.ip),data + p,4); p += 4; + Utils::copy<4>(&(rules[ruleCount].v.ipv4.ip),data + p); p += 4; rules[ruleCount].v.ipv4.mask = data[p++]; break; case ZT_NETWORK_RULE_MATCH_IPV6_SOURCE: case ZT_NETWORK_RULE_MATCH_IPV6_DEST: if ((p + 17) > len) return -1; - memcpy(rules[ruleCount].v.ipv6.ip,data + p,16); p += 16; + Utils::copy<16>(rules[ruleCount].v.ipv6.ip,data + p); p += 16; rules[ruleCount].v.ipv6.mask = data[p++]; break; case ZT_NETWORK_RULE_MATCH_IP_TOS: diff --git a/node/Capability.hpp b/node/Capability.hpp index 392fc30d0..276b96e50 100644 --- a/node/Capability.hpp +++ b/node/Capability.hpp @@ -82,7 +82,7 @@ public: _ruleCount((ruleCount < ZT_MAX_CAPABILITY_RULES) ? ruleCount : ZT_MAX_CAPABILITY_RULES) { if (_ruleCount > 0) - memcpy(_rules,rules,sizeof(ZT_VirtualNetworkRule) * _ruleCount); + Utils::copy(_rules,rules,sizeof(ZT_VirtualNetworkRule) * _ruleCount); } /** diff --git a/node/CertificateOfMembership.cpp b/node/CertificateOfMembership.cpp index 9dcc634f7..f31be8e01 100644 --- a/node/CertificateOfMembership.cpp +++ b/node/CertificateOfMembership.cpp @@ -57,7 +57,7 @@ bool CertificateOfMembership::agreesWith(const CertificateOfMembership &other) c } } - // them <> us + // them <> us (we need a second pass in case they have qualifiers we don't or vice versa) for(FCV<_Qualifier,ZT_CERTIFICATEOFMEMBERSHIP_MAX_ADDITIONAL_QUALIFIERS>::const_iterator i(other._additionalQualifiers.begin());i != other._additionalQualifiers.end();++i) { if (i->delta != 0xffffffffffffffffULL) { const uint64_t *v2 = nullptr; @@ -113,7 +113,7 @@ int CertificateOfMembership::marshal(uint8_t data[ZT_CERTIFICATEOFMEMBERSHIP_MAR if (v2) { // V2 marshal format will have three tuples followed by the fingerprint hash. Utils::storeBigEndian(data + 1,3); - memcpy(data + p,_issuedTo.hash(),48); + Utils::copy<48>(data + p,_issuedTo.hash()); p += 48; } else { // V1 marshal format must shove everything into tuples, resulting in nine. @@ -130,11 +130,11 @@ int CertificateOfMembership::marshal(uint8_t data[ZT_CERTIFICATEOFMEMBERSHIP_MAR if (v2) { // V2 marshal format prefixes signatures with a 16-bit length to support future signature types. Utils::storeBigEndian(data + p,(uint16_t)_signatureLength); p += 2; - memcpy(data + p,_signature,_signatureLength); + Utils::copy(data + p,_signature,_signatureLength); p += (int)_signatureLength; } else { // V1 only supports 96-byte signature fields. - memcpy(data + p,_signature,96); + Utils::copy<96>(data + p,_signature); p += 96; } @@ -204,19 +204,19 @@ int CertificateOfMembership::unmarshal(const uint8_t *data,int len) noexcept if ((p + 96) > len) return -1; _signatureLength = 96; - memcpy(_signature,data + p,96); + Utils::copy<96>(_signature,data + p); return p + 96; } else if (data[0] == 2) { if ((p + 48) > len) return -1; - memcpy(_issuedTo.apiFingerprint()->hash,data + p,48); + Utils::copy<48>(_issuedTo.apiFingerprint()->hash,data + p); p += 48; if ((p + 2) > len) return -1; _signatureLength = Utils::loadBigEndian(data + p); if ((_signatureLength > (unsigned int)sizeof(_signature))||((p + (int)_signatureLength) > len)) return -1; - memcpy(_signature,data + p,_signatureLength); + Utils::copy(_signature,data + p,_signatureLength); return p + (int)_signatureLength; } diff --git a/node/CertificateOfOwnership.cpp b/node/CertificateOfOwnership.cpp index d7e22f524..690ffa99b 100644 --- a/node/CertificateOfOwnership.cpp +++ b/node/CertificateOfOwnership.cpp @@ -17,21 +17,23 @@ namespace ZeroTier { void CertificateOfOwnership::addThing(const InetAddress &ip) { - if (_thingCount >= ZT_CERTIFICATEOFOWNERSHIP_MAX_THINGS) return; + if (_thingCount >= ZT_CERTIFICATEOFOWNERSHIP_MAX_THINGS) + return; if (ip.family() == AF_INET) { _thingTypes[_thingCount] = THING_IPV4_ADDRESS; - memcpy(_thingValues[_thingCount],&(reinterpret_cast(&ip)->sin_addr.s_addr),4); + Utils::copy<4>(_thingValues[_thingCount],&(reinterpret_cast(&ip)->sin_addr.s_addr)); ++_thingCount; } else if (ip.family() == AF_INET6) { _thingTypes[_thingCount] = THING_IPV6_ADDRESS; - memcpy(_thingValues[_thingCount],reinterpret_cast(&ip)->sin6_addr.s6_addr,16); + Utils::copy<16>(_thingValues[_thingCount],reinterpret_cast(&ip)->sin6_addr.s6_addr); ++_thingCount; } } void CertificateOfOwnership::addThing(const MAC &mac) { - if (_thingCount >= ZT_CERTIFICATEOFOWNERSHIP_MAX_THINGS) return; + if (_thingCount >= ZT_CERTIFICATEOFOWNERSHIP_MAX_THINGS) + return; _thingTypes[_thingCount] = THING_MAC_ADDRESS; mac.copyTo(_thingValues[_thingCount]); ++_thingCount; @@ -63,7 +65,7 @@ int CertificateOfOwnership::marshal(uint8_t data[ZT_CERTIFICATEOFOWNERSHIP_MARSH p += 30; for(unsigned int i=0,j=_thingCount;i(data + p,_thingValues[i]); p += ZT_CERTIFICATEOFOWNERSHIP_MAX_THING_VALUE_SIZE; } _issuedTo.copyTo(data + p); p += ZT_ADDRESS_LENGTH; @@ -71,7 +73,7 @@ int CertificateOfOwnership::marshal(uint8_t data[ZT_CERTIFICATEOFOWNERSHIP_MARSH if (!forSign) { data[p++] = 1; Utils::storeBigEndian(data + p,(uint16_t)_signatureLength); p += 2; - memcpy(data + p,_signature,_signatureLength); p += (int)_signatureLength; + Utils::copy(data + p,_signature,_signatureLength); p += (int)_signatureLength; } data[p++] = 0; data[p++] = 0; @@ -100,7 +102,7 @@ int CertificateOfOwnership::unmarshal(const uint8_t *data,int len) noexcept if ((p + 1 + ZT_CERTIFICATEOFOWNERSHIP_MAX_THING_VALUE_SIZE) > len) return -1; _thingTypes[i] = data[p++]; - memcpy(_thingValues[i],data + p,ZT_CERTIFICATEOFOWNERSHIP_MAX_THING_VALUE_SIZE); + Utils::copy(_thingValues[i],data + p); p += ZT_CERTIFICATEOFOWNERSHIP_MAX_THING_VALUE_SIZE; } diff --git a/node/CertificateOfOwnership.hpp b/node/CertificateOfOwnership.hpp index c9886a5f4..14c58d02d 100644 --- a/node/CertificateOfOwnership.hpp +++ b/node/CertificateOfOwnership.hpp @@ -64,7 +64,7 @@ public: ZT_INLINE CertificateOfOwnership(const uint64_t nwid,const int64_t ts,const Address &issuedTo,const uint32_t id) noexcept { - memset(reinterpret_cast(this),0,sizeof(CertificateOfOwnership)); + memoryZero(this); _networkId = nwid; _ts = ts; _id = id; diff --git a/node/Defragmenter.hpp b/node/Defragmenter.hpp index e2fee2b45..98e0ff1fa 100644 --- a/node/Defragmenter.hpp +++ b/node/Defragmenter.hpp @@ -263,7 +263,7 @@ public: e->via.zero(); } - // Slices are TriviallyCopyable and so may be memcpy'd from e->message to + // Slices are TriviallyCopyable and so may be raw copied from e->message to // the result parameter. This is fast. e->message.unsafeMoveTo(message); e->lastUsed = -1; // mark as "done" and force GC to collect diff --git a/node/Dictionary.cpp b/node/Dictionary.cpp index bf668535c..d168ad1e0 100644 --- a/node/Dictionary.cpp +++ b/node/Dictionary.cpp @@ -134,7 +134,7 @@ uint64_t Dictionary::getUI(const char *k,uint64_t dfl) const if (!e.empty()) { if (e.back() != 0) { const unsigned long sl = e.size(); - memcpy(tmp,e.data(),(sl > 17) ? 17 : sl); + Utils::copy(tmp,e.data(),(sl > 17) ? 17 : sl); tmp[17] = 0; return Utils::unhex((const char *)tmp); } diff --git a/node/Endpoint.cpp b/node/Endpoint.cpp index 40117a924..41d413e8e 100644 --- a/node/Endpoint.cpp +++ b/node/Endpoint.cpp @@ -82,7 +82,7 @@ int Endpoint::marshal(uint8_t data[ZT_ENDPOINT_MARSHAL_SIZE_MAX]) const noexcept data[9] = (uint8_t)(_v.zt.address >> 16U); data[10] = (uint8_t)(_v.zt.address >> 8U); data[11] = (uint8_t)_v.zt.address; - memcpy(data + 12,_v.zt.hash,ZT_IDENTITY_HASH_SIZE); + Utils::copy(data + 12,_v.zt.hash); return ZT_IDENTITY_HASH_SIZE + 12; case TYPE_DNSNAME: p = 7; @@ -147,7 +147,7 @@ int Endpoint::unmarshal(const uint8_t *restrict data,const int len) noexcept _v.zt.address |= ((uint64_t)data[9]) << 16U; _v.zt.address |= ((uint64_t)data[10]) << 8U; _v.zt.address |= (uint64_t)data[11]; - memcpy(_v.zt.hash,data + 12,ZT_IDENTITY_HASH_SIZE); + Utils::copy(_v.zt.hash,data + 12); return 60; case TYPE_DNSNAME: if (len < 10) diff --git a/node/Endpoint.hpp b/node/Endpoint.hpp index 73c2252b4..218e6ae53 100644 --- a/node/Endpoint.hpp +++ b/node/Endpoint.hpp @@ -76,7 +76,7 @@ public: _t(TYPE_ZEROTIER) { _v.zt.address = zt.toInt(); - memcpy(_v.zt.hash,identityHash,ZT_IDENTITY_HASH_SIZE); + Utils::copy(_v.zt.hash,identityHash); } explicit ZT_INLINE Endpoint(const char *name,const int port) noexcept : diff --git a/node/FCV.hpp b/node/FCV.hpp index 64a5254b3..f1693ab31 100644 --- a/node/FCV.hpp +++ b/node/FCV.hpp @@ -87,11 +87,6 @@ public: /** * This does a straight copy of one vector's data to another * - * If the other vector is larger than this one's capacity the data is - * silently truncated. This is unsafe in that it does not call any - * constructors or destructors and copies data with memcpy, so it can - * only be used with primitive types or TriviallyCopyable objects. - * * @tparam C2 Inferred capacity of other vector * @param v Other vector to copy to this one */ @@ -99,20 +94,17 @@ public: ZT_INLINE void unsafeAssign(const FCV &v) noexcept { _s = ((C2 > C)&&(v._s > C)) ? C : v._s; - memcpy(_m,v._m,_s * sizeof(T)); + Utils::copy(_m,v._m,_s * sizeof(T)); } /** * Move contents from this vector to another and clear this vector * - * This uses a straight memcpy and so is only safe for primitive types or - * types that are TriviallyCopyable. - * * @param v Target vector */ ZT_INLINE void unsafeMoveTo(FCV &v) noexcept { - memcpy(v._m,_m,(v._s = _s) * sizeof(T)); + Utils::copy(v._m,_m,(v._s = _s) * sizeof(T)); _s = 0; } diff --git a/node/Fingerprint.hpp b/node/Fingerprint.hpp index 676ffcbec..e713e8ec8 100644 --- a/node/Fingerprint.hpp +++ b/node/Fingerprint.hpp @@ -62,7 +62,7 @@ public: { uint8_t tmp[48 + 5]; address().copyTo(tmp); - memcpy(tmp + 5,_fp.hash,48); + Utils::copy<48>(tmp + 5,_fp.hash); Utils::b32e(tmp,sizeof(tmp),s,ZT_FINGERPRINT_STRING_BUFFER_LENGTH); s[ZT_FINGERPRINT_STRING_BUFFER_LENGTH-1] = 0; // sanity check, ensure always zero terminated } @@ -79,7 +79,7 @@ public: if (Utils::b32d(s,tmp,sizeof(tmp)) != sizeof(tmp)) return false; _fp.address = Address(tmp).toInt(); - memcpy(_fp.hash,tmp + 5,48); + Utils::copy<48>(_fp.hash,tmp + 5); return true; } diff --git a/node/Hashtable.hpp b/node/Hashtable.hpp index 4ef84e5bf..97733c1c8 100644 --- a/node/Hashtable.hpp +++ b/node/Hashtable.hpp @@ -15,6 +15,7 @@ #define ZT_HASHTABLE_HPP #include "Constants.hpp" +#include "Utils.hpp" #include #include @@ -95,14 +96,14 @@ public: /** * @param bc Initial capacity in buckets (default: 32, must be nonzero) */ - explicit ZT_INLINE Hashtable(unsigned long bc = 32) : + explicit ZT_INLINE Hashtable(unsigned int bc = 32) : _t(reinterpret_cast<_Bucket **>(::malloc(sizeof(_Bucket *) * bc))), _bc(bc), _s(0) { if (!_t) throw std::bad_alloc(); - memset(_t,0,sizeof(_Bucket *) * bc); + Utils::zero(_t,sizeof(uintptr_t) * bc); } ZT_INLINE Hashtable(const Hashtable &ht) : diff --git a/node/Identity.cpp b/node/Identity.cpp index 902e6a880..49cc042b8 100644 --- a/node/Identity.cpp +++ b/node/Identity.cpp @@ -40,7 +40,7 @@ void identityV0ProofOfWorkFrankenhash(const void *const publicKey,unsigned int p // Initialize genmem[] using Salsa20 in a CBC-like configuration since // ordinary Salsa20 is randomly seek-able. This is good for a cipher // but is not what we want for sequential memory-hardness. - memset(genmem,0,ZT_V0_IDENTITY_GEN_MEMORY); + Utils::zero(genmem); Salsa20 s20(digest,(char *)digest + 32); s20.crypt20((char *)genmem,(char *)genmem,64); for(unsigned long i=64;i(h); } unsigned int Identity::sign(const void *data,unsigned int len,void *sig,unsigned int siglen) const @@ -319,7 +319,7 @@ bool Identity::agree(const Identity &id,uint8_t key[ZT_PEER_SECRET_KEY_LENGTH]) // C25519 portion of a type 1 P-384 key. C25519::agree(_priv.c25519,id._pub.c25519,rawkey); SHA512(h,rawkey,ZT_C25519_SHARED_KEY_LEN); - memcpy(key,h,ZT_PEER_SECRET_KEY_LENGTH); + Utils::copy(key,h); return true; } @@ -334,13 +334,13 @@ bool Identity::agree(const Identity &id,uint8_t key[ZT_PEER_SECRET_KEY_LENGTH]) C25519::agree(_priv.c25519,id._pub.c25519,rawkey); ECC384ECDH(id._pub.p384,_priv.p384,rawkey + ZT_C25519_SHARED_KEY_LEN); SHA384(h,rawkey,ZT_C25519_SHARED_KEY_LEN + ZT_ECC384_SHARED_SECRET_SIZE); - memcpy(key,h,ZT_PEER_SECRET_KEY_LENGTH); + Utils::copy(key,h); return true; } else if (id._type == C25519) { // If the other identity is a C25519 identity we can agree using only that type. C25519::agree(_priv.c25519,id._pub.c25519,rawkey); SHA512(h,rawkey,ZT_C25519_SHARED_KEY_LEN); - memcpy(key,h,ZT_PEER_SECRET_KEY_LENGTH); + Utils::copy(key,h); return true; } @@ -502,10 +502,10 @@ int Identity::marshal(uint8_t data[ZT_IDENTITY_MARSHAL_SIZE_MAX],const bool incl switch(_type) { case C25519: data[ZT_ADDRESS_LENGTH] = (uint8_t)C25519; - memcpy(data + ZT_ADDRESS_LENGTH + 1,_pub.c25519,ZT_C25519_PUBLIC_KEY_LEN); + Utils::copy(data + ZT_ADDRESS_LENGTH + 1,_pub.c25519); if ((includePrivate)&&(_hasPrivate)) { data[ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN] = ZT_C25519_PRIVATE_KEY_LEN; - memcpy(data + ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN + 1,_priv.c25519,ZT_C25519_PRIVATE_KEY_LEN); + Utils::copy(data + ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN + 1,_priv.c25519); return ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN + 1 + ZT_C25519_PRIVATE_KEY_LEN; } else { data[ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN] = 0; @@ -514,10 +514,10 @@ int Identity::marshal(uint8_t data[ZT_IDENTITY_MARSHAL_SIZE_MAX],const bool incl case P384: data[ZT_ADDRESS_LENGTH] = (uint8_t)P384; - memcpy(data + ZT_ADDRESS_LENGTH + 1,&_pub,ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE); + Utils::copy(data + ZT_ADDRESS_LENGTH + 1,&_pub); if ((includePrivate)&&(_hasPrivate)) { data[ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE] = ZT_IDENTITY_P384_COMPOUND_PRIVATE_KEY_SIZE; - memcpy(data + ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE + 1,&_priv,ZT_IDENTITY_P384_COMPOUND_PRIVATE_KEY_SIZE); + Utils::copy(data + ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE + 1,&_priv); return ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE + 1 + ZT_IDENTITY_P384_COMPOUND_PRIVATE_KEY_SIZE; } else { data[ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE] = 0; @@ -544,7 +544,7 @@ int Identity::unmarshal(const uint8_t *data,const int len) noexcept if (len < (ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN + 1)) return -1; - memcpy(_pub.c25519,data + ZT_ADDRESS_LENGTH + 1,ZT_C25519_PUBLIC_KEY_LEN); + Utils::copy(_pub.c25519,data + ZT_ADDRESS_LENGTH + 1); _computeHash(); privlen = data[ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN]; @@ -552,7 +552,7 @@ int Identity::unmarshal(const uint8_t *data,const int len) noexcept if (len < (ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN + 1 + ZT_C25519_PRIVATE_KEY_LEN)) return -1; _hasPrivate = true; - memcpy(_priv.c25519,data + ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN + 1,ZT_C25519_PRIVATE_KEY_LEN); + Utils::copy(_priv.c25519,data + ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN + 1); return ZT_ADDRESS_LENGTH + 1 + ZT_C25519_PUBLIC_KEY_LEN + 1 + ZT_C25519_PRIVATE_KEY_LEN; } else if (privlen == 0) { _hasPrivate = false; @@ -564,7 +564,7 @@ int Identity::unmarshal(const uint8_t *data,const int len) noexcept if (len < (ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE + 1)) return -1; - memcpy(&_pub,data + ZT_ADDRESS_LENGTH + 1,ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE); + Utils::copy(&_pub,data + ZT_ADDRESS_LENGTH + 1); _computeHash(); // this sets the address for P384 if (_address != Address(_fp.hash())) // this sanity check is possible with V1 identities return -1; @@ -574,7 +574,7 @@ int Identity::unmarshal(const uint8_t *data,const int len) noexcept if (len < (ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE + 1 + ZT_IDENTITY_P384_COMPOUND_PRIVATE_KEY_SIZE)) return -1; _hasPrivate = true; - memcpy(&_priv,data + ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE + 1,ZT_IDENTITY_P384_COMPOUND_PRIVATE_KEY_SIZE); + Utils::copy(&_priv,data + ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE + 1); return ZT_ADDRESS_LENGTH + 1 + ZT_IDENTITY_P384_COMPOUND_PUBLIC_KEY_SIZE + 1 + ZT_IDENTITY_P384_COMPOUND_PRIVATE_KEY_SIZE; } else if (privlen == 0) { _hasPrivate = false; diff --git a/node/InetAddress.cpp b/node/InetAddress.cpp index 214c94316..451f4b29c 100644 --- a/node/InetAddress.cpp +++ b/node/InetAddress.cpp @@ -96,16 +96,16 @@ InetAddress::IpScope InetAddress::ipScope() const noexcept void InetAddress::set(const void *ipBytes,unsigned int ipLen,unsigned int port) noexcept { - memset(this,0,sizeof(InetAddress)); + memoryZero(this); if (ipLen == 4) { uint32_t ipb[1]; - memcpy(ipb,ipBytes,4); + Utils::copy<4>(ipb,ipBytes); _data.ss_family = AF_INET; reinterpret_cast(this)->sin_addr.s_addr = ipb[0]; reinterpret_cast(this)->sin_port = Utils::hton((uint16_t)port); } else if (ipLen == 16) { _data.ss_family = AF_INET6; - memcpy(reinterpret_cast(this)->sin6_addr.s6_addr,ipBytes,16); + Utils::copy<16>(reinterpret_cast(this)->sin6_addr.s6_addr,ipBytes); reinterpret_cast(this)->sin6_port = Utils::hton((uint16_t)port); } } @@ -164,7 +164,7 @@ bool InetAddress::fromString(const char *ipSlashPort) noexcept { char buf[64]; - memset(this,0,sizeof(InetAddress)); + memoryZero(this); if (!*ipSlashPort) return true; @@ -214,7 +214,7 @@ InetAddress InetAddress::netmask() const noexcept nm[0] = 0; nm[1] = 0; } - memcpy(reinterpret_cast(&r)->sin6_addr.s6_addr,nm,16); + Utils::copy<16>(reinterpret_cast(&r)->sin6_addr.s6_addr,nm); } break; } return r; @@ -240,10 +240,10 @@ InetAddress InetAddress::network() const noexcept case AF_INET6: { uint64_t nm[2]; const unsigned int bits = netmaskBits(); - memcpy(nm,reinterpret_cast(&r)->sin6_addr.s6_addr,16); + Utils::copy<16>(nm,reinterpret_cast(&r)->sin6_addr.s6_addr); nm[0] &= Utils::hton((uint64_t)((bits >= 64) ? 0xffffffffffffffffULL : (0xffffffffffffffffULL << (64 - bits)))); nm[1] &= Utils::hton((uint64_t)((bits <= 64) ? 0ULL : (0xffffffffffffffffULL << (128 - bits)))); - memcpy(reinterpret_cast(&r)->sin6_addr.s6_addr,nm,16); + Utils::copy<16>(reinterpret_cast(&r)->sin6_addr.s6_addr,nm); } break; } return r; @@ -324,7 +324,7 @@ void InetAddress::forTrace(ZT_TraceEventPathAddress &ta) const noexcept uint32_t tmp; switch(_data.ss_family) { default: - memset(&ta,0,sizeof(ZT_TraceEventPathAddress)); + Utils::zero(&ta); break; case AF_INET: ta.type = ZT_TRACE_EVENT_PATH_TYPE_INETADDR_V4; @@ -333,13 +333,13 @@ void InetAddress::forTrace(ZT_TraceEventPathAddress &ta) const noexcept ta.address[1] = reinterpret_cast(&tmp)[1]; ta.address[2] = reinterpret_cast(&tmp)[2]; ta.address[3] = reinterpret_cast(&tmp)[3]; - memset(ta.address + 4,0,sizeof(ta.address) - 4); + Utils::zero(ta.address + 4); ta.port = reinterpret_cast(this)->sin_port; break; case AF_INET6: ta.type = ZT_TRACE_EVENT_PATH_TYPE_INETADDR_V6; - memcpy(ta.address,reinterpret_cast(this)->sin6_addr.s6_addr,16); - memset(ta.address + 16,0,sizeof(ta.address) - 16); + Utils::copy<16>(ta.address,reinterpret_cast(this)->sin6_addr.s6_addr); + Utils::zero(ta.address + 16); ta.port = reinterpret_cast(this)->sin6_port; break; } @@ -415,7 +415,7 @@ int InetAddress::unmarshal(const uint8_t *restrict data,const int len) noexcept case 4: if (len < 7) return -1; - memset(reinterpret_cast(this),0,sizeof(InetAddress)); + memoryZero(this); reinterpret_cast(this)->sin_family = AF_INET; reinterpret_cast(&(reinterpret_cast(this)->sin_addr.s_addr))[0] = data[1]; reinterpret_cast(&(reinterpret_cast(this)->sin_addr.s_addr))[1] = data[2]; @@ -427,7 +427,7 @@ int InetAddress::unmarshal(const uint8_t *restrict data,const int len) noexcept case 6: if (len < 19) return -1; - memset(reinterpret_cast(this),0,sizeof(InetAddress)); + memoryZero(this); reinterpret_cast(this)->sin6_family = AF_INET6; for(int i=0;i<16;i++) (reinterpret_cast(this)->sin6_addr.s6_addr)[i] = data[i+1]; diff --git a/node/InetAddress.hpp b/node/InetAddress.hpp index 5c9313772..eed7bb158 100644 --- a/node/InetAddress.hpp +++ b/node/InetAddress.hpp @@ -43,9 +43,9 @@ private: template ZT_INLINE void copySockaddrToThis(const SA *sa) noexcept { - memcpy(reinterpret_cast(this),sa,sizeof(SA)); + Utils::copy(reinterpret_cast(this),sa); if (sizeof(SA) < sizeof(InetAddress)) - memset(reinterpret_cast(this) + sizeof(SA),0,sizeof(InetAddress) - sizeof(SA)); + Utils::zero(reinterpret_cast(this) + sizeof(SA)); } public: @@ -100,8 +100,11 @@ public: ZT_INLINE InetAddress(const uint32_t ipv4,unsigned int port) noexcept { this->set(&ipv4,4,port); } explicit ZT_INLINE InetAddress(const char *ipSlashPort) noexcept { this->fromString(ipSlashPort); } - ZT_INLINE void clear() noexcept { memoryZero(this); } - + ZT_INLINE InetAddress &operator=(const InetAddress &a) noexcept + { + memoryCopy(this,a); + return *this; + } ZT_INLINE InetAddress &operator=(const sockaddr_storage &ss) noexcept { memoryCopyUnsafe(this,&ss); @@ -123,7 +126,7 @@ public: { if (sa) copySockaddrToThis(sa); - else memset(reinterpret_cast(this),0,sizeof(InetAddress)); + else memoryZero(this); return *this; } ZT_INLINE InetAddress &operator=(const sockaddr_in6 &sa) noexcept @@ -135,7 +138,7 @@ public: { if (sa) copySockaddrToThis(sa); - else memset(reinterpret_cast(this),0,sizeof(InetAddress)); + else memoryZero(this); return *this; } ZT_INLINE InetAddress &operator=(const sockaddr &sa) noexcept @@ -144,7 +147,7 @@ public: copySockaddrToThis(reinterpret_cast(&sa)); else if (sa.sa_family == AF_INET6) copySockaddrToThis(reinterpret_cast(&sa)); - else memset(reinterpret_cast(this),0,sizeof(InetAddress)); + else memoryZero(this); return *this; } ZT_INLINE InetAddress &operator=(const sockaddr *sa) noexcept @@ -161,6 +164,8 @@ public: return *this; } + ZT_INLINE void clear() noexcept { memoryZero(this); } + /** * @return Address family (ss_family in sockaddr_storage) */ @@ -339,7 +344,7 @@ public: break; case AF_INET6: reinterpret_cast(&r)->sin6_family = AF_INET; - memcpy(reinterpret_cast(&r)->sin6_addr.s6_addr,reinterpret_cast(this)->sin6_addr.s6_addr,16); + Utils::copy<16>(reinterpret_cast(&r)->sin6_addr.s6_addr,reinterpret_cast(this)->sin6_addr.s6_addr); break; } return r; diff --git a/node/LZ4.cpp b/node/LZ4.cpp index ccff0bab4..bfdaae97d 100644 --- a/node/LZ4.cpp +++ b/node/LZ4.cpp @@ -37,6 +37,7 @@ // original LZ4 license. #include "LZ4.hpp" +#include "Utils.hpp" #include #include @@ -113,7 +114,6 @@ union LZ4_streamDecode_u { #define ALLOCATOR(n,s) calloc(n,s) #define FREEMEM free -#define MEM_INIT memset typedef uint8_t BYTE; typedef uint16_t U16; @@ -142,26 +142,26 @@ FORCE_INLINE U32 LZ4_read32(const void* ptr) { return ((const unalign*)ptr)->u32 FORCE_INLINE reg_t LZ4_read_ARCH(const void* ptr) { return ((const unalign*)ptr)->uArch; } FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } -#else /* safe and portable access through memcpy() */ +#else /* safe and portable */ FORCE_INLINE U16 LZ4_read16(const void* memPtr) { - U16 val; memcpy(&val, memPtr, sizeof(val)); return val; + U16 val; Utils::copy(&val, memPtr, sizeof(val)); return val; } FORCE_INLINE U32 LZ4_read32(const void* memPtr) { - U32 val; memcpy(&val, memPtr, sizeof(val)); return val; + U32 val; Utils::copy(&val, memPtr, sizeof(val)); return val; } FORCE_INLINE reg_t LZ4_read_ARCH(const void* memPtr) { - reg_t val; memcpy(&val, memPtr, sizeof(val)); return val; + reg_t val; Utils::copy(&val, memPtr, sizeof(val)); return val; } FORCE_INLINE void LZ4_write16(void* memPtr, U16 value) { - memcpy(memPtr, &value, sizeof(value)); + Utils::copy(memPtr, &value, sizeof(value)); } FORCE_INLINE void LZ4_write32(void* memPtr, U32 value) { - memcpy(memPtr, &value, sizeof(value)); + Utils::copy(memPtr, &value, sizeof(value)); } #endif /* LZ4_FORCE_MEMORY_ACCESS */ @@ -188,7 +188,7 @@ FORCE_INLINE void LZ4_writeLE16(void* memPtr, U16 value) FORCE_INLINE void LZ4_copy8(void* dst, const void* src) { - memcpy(dst,src,8); + Utils::copy<8>(dst,src); } FORCE_INLINE void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd) @@ -555,7 +555,7 @@ FORCE_INLINE int LZ4_compress_generic( } else { *op++ = (BYTE)(lastRun<(LZ4_stream); } FORCE_INLINE int LZ4_decompress_generic( @@ -655,7 +655,7 @@ FORCE_INLINE int LZ4_decompress_generic( if ((!endOnInput) && (cpy != oend)) goto _output_error; /* Error : block decoding must stop exactly there */ if ((endOnInput) && ((ip+length != iend) || (cpy > oend))) goto _output_error; /* Error : input must be consumed */ } - memcpy(op, ip, length); + Utils::copy(op, ip, length); ip += length; op += length; break; /* Necessarily EOF, due to parsing restrictions */ @@ -694,14 +694,14 @@ FORCE_INLINE int LZ4_decompress_generic( /* match encompass external dictionary and current block */ size_t const copySize = (size_t)(lowPrefix-match); size_t const restSize = length - copySize; - memcpy(op, dictEnd - copySize, copySize); + Utils::copy(op, dictEnd - copySize, copySize); op += copySize; if (restSize > (size_t)(op-lowPrefix)) { /* overlap copy */ BYTE* const endOfMatch = op + restSize; const BYTE* copyFrom = lowPrefix; while (op < endOfMatch) *op++ = *copyFrom++; } else { - memcpy(op, lowPrefix, restSize); + Utils::copy(op, lowPrefix, restSize); op += restSize; } } continue; @@ -716,7 +716,7 @@ FORCE_INLINE int LZ4_decompress_generic( op[2] = match[2]; op[3] = match[3]; match += dec32table[offset]; - memcpy(op+4, match, 4); + Utils::copy<4>(op+4, match); match -= dec64; } else { LZ4_copy8(op, match); match+=8; } op += 8; diff --git a/node/Locator.cpp b/node/Locator.cpp index 76a9cc0ca..329592eec 100644 --- a/node/Locator.cpp +++ b/node/Locator.cpp @@ -59,7 +59,7 @@ int Locator::marshal(uint8_t data[ZT_LOCATOR_MARSHAL_SIZE_MAX],const bool exclud if (!excludeSignature) { Utils::storeBigEndian(data + p,(uint16_t)_signatureLength); p += 2; - memcpy(data + p,_signature,_signatureLength); + Utils::copy(data + p,_signature,_signatureLength); p += (int)_signatureLength; } @@ -102,7 +102,7 @@ int Locator::unmarshal(const uint8_t *restrict data,const int len) noexcept _signatureLength = sl; if ((p + (int)sl) > len) return -1; - memcpy(_signature,data + p,sl); + Utils::copy(_signature,data + p,sl); p += (int)sl; if ((p + 2) > len) diff --git a/node/Network.cpp b/node/Network.cpp index 620eedce1..a0f3067b3 100644 --- a/node/Network.cpp +++ b/node/Network.cpp @@ -1458,20 +1458,20 @@ void Network::_externalConfig(ZT_VirtualNetworkConfig *ec) const ec->assignedAddressCount = 0; for(unsigned int i=0;iassignedAddresses[i]),&(_config.staticIps[i]),sizeof(struct sockaddr_storage)); + Utils::copy(&(ec->assignedAddresses[i]),&(_config.staticIps[i])); ++ec->assignedAddressCount; } else { - memset(&(ec->assignedAddresses[i]),0,sizeof(struct sockaddr_storage)); + Utils::zero(&(ec->assignedAddresses[i])); } } ec->routeCount = 0; for(unsigned int i=0;iroutes[i]),&(_config.routes[i]),sizeof(ZT_VirtualNetworkRoute)); + Utils::copy(&(ec->routes[i]),&(_config.routes[i])); ++ec->routeCount; } else { - memset(&(ec->routes[i]),0,sizeof(ZT_VirtualNetworkRoute)); + Utils::zero(&(ec->routes[i])); } } } diff --git a/node/NetworkConfig.cpp b/node/NetworkConfig.cpp index fbb5833e9..91c40af2e 100644 --- a/node/NetworkConfig.cpp +++ b/node/NetworkConfig.cpp @@ -123,9 +123,9 @@ bool NetworkConfig::fromDictionary(const Dictionary &d) this->issuedTo = d.getUI(ZT_NETWORKCONFIG_DICT_KEY_ISSUED_TO,0); const std::vector *blob = &(d[ZT_NETWORKCONFIG_DICT_KEY_ISSUED_TO_IDENTITY_HASH]); if (blob->size() == ZT_IDENTITY_HASH_SIZE) { - memcpy(this->issuedToFingerprintHash,blob->data(),ZT_IDENTITY_HASH_SIZE); + Utils::copy(this->issuedToFingerprintHash,blob->data()); } else { - memset(this->issuedToFingerprintHash,0,ZT_IDENTITY_HASH_SIZE); + Utils::zero(this->issuedToFingerprintHash); } if (!this->issuedTo) return false; diff --git a/node/NetworkConfig.hpp b/node/NetworkConfig.hpp index 8434452c7..42c6a8288 100644 --- a/node/NetworkConfig.hpp +++ b/node/NetworkConfig.hpp @@ -157,9 +157,6 @@ namespace ZeroTier { /** * Network configuration received from network controller nodes - * - * This is a memcpy()'able structure and is safe (in a crash sense) to modify - * without locks. */ struct NetworkConfig : TriviallyCopyable { diff --git a/node/Node.cpp b/node/Node.cpp index 649861570..a08b1a8c2 100644 --- a/node/Node.cpp +++ b/node/Node.cpp @@ -493,7 +493,7 @@ ZT_PeerList *Node::peers() const identities[pl->peerCount] = (*pi)->identity(); // need to make a copy in case peer gets deleted p->identity = &identities[pl->peerCount]; p->fingerprint.address = p->address; - memcpy(p->fingerprint.hash,(*pi)->identity().fingerprint().hash(),ZT_IDENTITY_HASH_SIZE); + Utils::copy(p->fingerprint.hash,(*pi)->identity().fingerprint().hash()); if ((*pi)->remoteVersionKnown()) { p->versionMajor = (int)(*pi)->remoteVersionMajor(); p->versionMinor = (int)(*pi)->remoteVersionMinor(); @@ -507,13 +507,13 @@ ZT_PeerList *Node::peers() const if (p->latency >= 0xffff) p->latency = -1; p->root = RR->topology->isRoot((*pi)->identity()) ? 1 : 0; - memcpy(&p->bootstrap,&((*pi)->bootstrap()),sizeof(sockaddr_storage)); + Utils::copy(&p->bootstrap,&((*pi)->bootstrap())); std::vector< SharedPtr > paths; (*pi)->getAllPaths(paths); p->pathCount = 0; for(std::vector< SharedPtr >::iterator path(paths.begin());path!=paths.end();++path) { - memcpy(&(p->paths[p->pathCount].address),&((*path)->address()),sizeof(struct sockaddr_storage)); + Utils::copy(&(p->paths[p->pathCount].address),&((*path)->address())); p->paths[p->pathCount].lastSend = (*path)->lastOut(); p->paths[p->pathCount].lastReceive = (*path)->lastIn(); p->paths[p->pathCount].trustedPathId = RR->topology->getOutboundPathTrust((*path)->address()); diff --git a/node/Protocol.cpp b/node/Protocol.cpp index ba2f9feb1..51f11943f 100644 --- a/node/Protocol.cpp +++ b/node/Protocol.cpp @@ -33,8 +33,8 @@ std::atomic _s_packetIdCtr((uint64_t)time(nullptr) << 32U); uint64_t createProbe(const Identity &sender,const Identity &recipient,const uint8_t key[ZT_PEER_SECRET_KEY_LENGTH]) noexcept { uint8_t tmp[ZT_IDENTITY_HASH_SIZE + ZT_IDENTITY_HASH_SIZE]; - memcpy(tmp,sender.fingerprint().hash(),ZT_IDENTITY_HASH_SIZE); - memcpy(tmp + ZT_IDENTITY_HASH_SIZE,recipient.fingerprint().hash(),ZT_IDENTITY_HASH_SIZE); + Utils::copy(tmp,sender.fingerprint().hash()); + Utils::copy(tmp + ZT_IDENTITY_HASH_SIZE,recipient.fingerprint().hash()); uint64_t hash[6]; SHA384(hash,tmp,sizeof(tmp),key,ZT_PEER_SECRET_KEY_LENGTH); return hash[0]; @@ -93,7 +93,7 @@ int compress(SharedPtr &pkt,int packetSize) noexcept const int uncompressedLen = packetSize - ZT_PROTO_PACKET_PAYLOAD_START; const int compressedLen = LZ4_compress_fast(reinterpret_cast(pkt->unsafeData + ZT_PROTO_PACKET_PAYLOAD_START),reinterpret_cast(pkt2->unsafeData + ZT_PROTO_PACKET_PAYLOAD_START),uncompressedLen,ZT_BUF_MEM_SIZE - ZT_PROTO_PACKET_PAYLOAD_START); if ((compressedLen > 0)&&(compressedLen < uncompressedLen)) { - memcpy(pkt2->unsafeData,pkt->unsafeData,ZT_PROTO_PACKET_PAYLOAD_START); + Utils::copy(pkt2->unsafeData,pkt->unsafeData); pkt.swap(pkt2); pkt->as().verb |= ZT_PROTO_VERB_FLAG_COMPRESSED; return compressedLen + ZT_PROTO_PACKET_PAYLOAD_START; diff --git a/node/Revocation.cpp b/node/Revocation.cpp index 0f4a9af01..4e6bb009b 100644 --- a/node/Revocation.cpp +++ b/node/Revocation.cpp @@ -46,7 +46,7 @@ int Revocation::marshal(uint8_t data[ZT_REVOCATION_MARSHAL_SIZE_MAX],bool forSig if (!forSign) { data[p++] = 1; Utils::storeBigEndian(data + p,(uint16_t)_signatureLength); - memcpy(data + p,_signature,_signatureLength); + Utils::copy(data + p,_signature,_signatureLength); p += (int)_signatureLength; } data[p++] = 0; @@ -77,7 +77,7 @@ int Revocation::unmarshal(const uint8_t *restrict data,const int len) noexcept int p = 54 + (int)_signatureLength; if ((_signatureLength > ZT_SIGNATURE_BUFFER_SIZE)||(p > len)) return -1; - memcpy(_signature,data + 54,_signatureLength); + Utils::copy(_signature,data + 54,_signatureLength); if ((p + 2) > len) return -1; p += 2 + Utils::loadBigEndian(data + p); diff --git a/node/SHA512.cpp b/node/SHA512.cpp index 17549e95d..6e89cec28 100644 --- a/node/SHA512.cpp +++ b/node/SHA512.cpp @@ -265,7 +265,7 @@ void KBKDFHMACSHA384(const uint8_t key[32],const char label,const char context,c kbkdfMsg[11] = 1; kbkdfMsg[12] = 0; // key length: 256 bits as big-endian 32-bit value HMACSHA384(key,&kbkdfMsg,sizeof(kbkdfMsg),kbuf); - memcpy(out,kbuf,32); + Utils::copy<32>(out,kbuf); } } // namespace ZeroTier diff --git a/node/SharedPtr.hpp b/node/SharedPtr.hpp index 15c7d3647..358b3dd83 100644 --- a/node/SharedPtr.hpp +++ b/node/SharedPtr.hpp @@ -25,10 +25,6 @@ namespace ZeroTier { * This is an introspective shared pointer. Classes that need to be reference * counted must list this as a 'friend' and must have a private instance of * atomic called __refCount. - * - * This is technically TriviallyCopyable but extreme care must be taken if - * one wishes to handle it in this manner. A memcpy must be followed by a - * memset of the source to 0 so as to achieve 'move' semantics. */ template class SharedPtr : public TriviallyCopyable diff --git a/node/Tag.cpp b/node/Tag.cpp index 6d649f81c..69c038d7b 100644 --- a/node/Tag.cpp +++ b/node/Tag.cpp @@ -42,7 +42,7 @@ int Tag::marshal(uint8_t data[ZT_TAG_MARSHAL_SIZE_MAX],bool forSign) const noexc if (!forSign) { data[p++] = 1; Utils::storeBigEndian(data + p,(uint16_t)_signatureLength); p += 2; - memcpy(data + p,_signature,_signatureLength); + Utils::copy(data + p,_signature,_signatureLength); p += (int)_signatureLength; } data[p++] = 0; @@ -69,7 +69,7 @@ int Tag::unmarshal(const uint8_t *data,int len) noexcept int p = 37 + (int)_signatureLength; if ((_signatureLength > ZT_SIGNATURE_BUFFER_SIZE)||(p > len)) return -1; - memcpy(_signature,data + p,_signatureLength); + Utils::copy(_signature,data + p,_signatureLength); if ((p + 2) > len) return -1; p += 2 + Utils::loadBigEndian(data + p); diff --git a/node/Tests.cpp b/node/Tests.cpp index e1c72a1e3..6fbe278d3 100644 --- a/node/Tests.cpp +++ b/node/Tests.cpp @@ -338,19 +338,19 @@ extern "C" const char *ZTT_general() ZT_T_PRINTF("FAILED (loadAsIsEndian)" ZT_EOL_S); return "Utils::loadAsIsEndian() broken"; } - memset(t,0,sizeof(t)); + Utils::zero(t); Utils::storeAsIsEndian(t,0x0807060504030201ULL); if (t[0] != 1) { ZT_T_PRINTF("FAILED (storeAsIsEndian)" ZT_EOL_S); return "Utils::storeAsIsEndian() broken"; } - memset(t,0,sizeof(t)); + Utils::zero(t); Utils::storeAsIsEndian(t,0x04030201); if (t[0] != 1) { ZT_T_PRINTF("FAILED (storeAsIsEndian)" ZT_EOL_S); return "Utils::storeAsIsEndian() broken"; } - memset(t,0,sizeof(t)); + Utils::zero(t); Utils::storeAsIsEndian(t,0x0201); if (t[0] != 1) { ZT_T_PRINTF("FAILED (storeAsIsEndian)" ZT_EOL_S); @@ -369,19 +369,19 @@ extern "C" const char *ZTT_general() ZT_T_PRINTF("FAILED (loadAsIsEndian)" ZT_EOL_S); return "Utils::loadAsIsEndian() broken"; } - memset(t,0,sizeof(t)); + Utils::zero(t); Utils::storeAsIsEndian(t,0x0807060504030201ULL); if (t[0] != 8) { ZT_T_PRINTF("FAILED (storeAsIsEndian)" ZT_EOL_S); return "Utils::storeAsIsEndian() broken"; } - memset(t,0,sizeof(t)); + Utils::zero(t); Utils::storeAsIsEndian(t,0x04030201); if (t[0] != 4) { ZT_T_PRINTF("FAILED (storeAsIsEndian)" ZT_EOL_S); return "Utils::storeAsIsEndian() broken"; } - memset(t,0,sizeof(t)); + Utils::zero(t); Utils::storeAsIsEndian(t,0x0201); if (t[0] != 2) { ZT_T_PRINTF("FAILED (storeAsIsEndian)" ZT_EOL_S); @@ -831,14 +831,14 @@ extern "C" const char *ZTT_crypto() ZT_T_PRINTF("[crypto] Testing Salsa20... "); Salsa20 s20; s20.init(SALSA20_TV0_KEY,SALSA20_TV0_IV); - memset(ks,0,sizeof(ks)); + Utils::zero(ks); s20.crypt20(ks,ks,sizeof(ks)); if (memcmp(ks,SALSA20_TV0_KS,64) != 0) { ZT_T_PRINTF("FAILED (Salsa20 test vector)" ZT_EOL_S); return "Salsa20 test vector failed"; } s20.init(SALSA12_TV0_KEY,SALSA12_TV0_IV); - memset(ks,0,sizeof(ks)); + Utils::zero(ks); s20.crypt12(ks,ks,sizeof(ks)); if (memcmp(ks,SALSA12_TV0_KS,64) != 0) { ZT_T_PRINTF("FAILED (Salsa12 test vector)" ZT_EOL_S); @@ -994,8 +994,8 @@ extern "C" const char *ZTT_benchmarkCrypto() { try { uint8_t tmp[16384],tag[16]; - memset(tmp,0,sizeof(tmp)); - memset(tag,0,sizeof(tag)); + Utils::zero(tmp); + Utils::zero(tag); { ZT_T_PRINTF("[crypto] Benchmarking SHA384... "); @@ -1094,7 +1094,7 @@ extern "C" const char *ZTT_benchmarkCrypto() { uint8_t sig[ZT_C25519_SIGNATURE_LEN]; - memset(sig,0,sizeof(sig)); + Utils::zero(sig); ZT_T_PRINTF("[crypto] Benchmarking Ed25519 signature... "); int64_t start = now(); for(int i=0;i<150;++i) { @@ -1169,7 +1169,7 @@ extern "C" const char *ZTT_benchmarkCrypto() for(long i=0;i<10;++i) foo = (uint8_t)id.locallyValidate(); end = now(); - ZT_T_PRINTF("%.4f ms/validation" ZT_EOL_S,(double)(end - start) / 10.0); + ZT_T_PRINTF(" %.4f ms/validation" ZT_EOL_S,(double)(end - start) / 10.0); ZT_T_PRINTF("[crypto] Benchmarking V1 Identity generation..."); start = now(); for(long i=0;i<10;++i) { diff --git a/node/Trace.cpp b/node/Trace.cpp index 9430ad45d..67da4c515 100644 --- a/node/Trace.cpp +++ b/node/Trace.cpp @@ -74,7 +74,7 @@ void Trace::unexpectedError( ev.evSize = ZT_CONST_TO_BE_UINT16(sizeof(ev)); ev.evType = ZT_CONST_TO_BE_UINT16(ZT_TRACE_UNEXPECTED_ERROR); ev.codeLocation = codeLocation; - memset(ev.message,0,sizeof(ev.message)); + Utils::zero(ev.message); va_start(ap,message); vsnprintf(ev.message,sizeof(ev.message),message,ap); va_end(ap); @@ -116,12 +116,12 @@ void Trace::_tryingNewPath( ev.evSize = ZT_CONST_TO_BE_UINT16(sizeof(ev)); ev.evType = ZT_CONST_TO_BE_UINT16(ZT_TRACE_VL1_TRYING_NEW_PATH); ev.codeLocation = Utils::hton(codeLocation); - memcpy(&ev.peer,trying.fingerprint().apiFingerprint(),sizeof(ev.peer)); + Utils::copy(&ev.peer,trying.fingerprint().apiFingerprint()); physicalAddress.forTrace(ev.physicalAddress); triggerAddress.forTrace(ev.triggerAddress); ev.triggeringPacketId = triggeringPacketId; ev.triggeringPacketVerb = triggeringPacketVerb; - memcpy(&ev.triggeringPeer,triggeringPeer.fingerprint().apiFingerprint(),sizeof(ev.triggeringPeer)); + Utils::copy(&ev.triggeringPeer,triggeringPeer.fingerprint().apiFingerprint()); ev.reason = (uint8_t)reason; RR->node->postEvent(tPtr,ZT_EVENT_TRACE,&ev); } @@ -139,7 +139,7 @@ void Trace::_learnedNewPath( ev.evType = ZT_CONST_TO_BE_UINT16(ZT_TRACE_VL1_LEARNED_NEW_PATH); ev.codeLocation = Utils::hton(codeLocation); ev.packetId = packetId; // packet IDs are kept in big-endian - memcpy(&ev.peer,peerIdentity.fingerprint().apiFingerprint(),sizeof(ev.peer)); + Utils::copy(&ev.peer,peerIdentity.fingerprint().apiFingerprint()); physicalAddress.forTrace(ev.physicalAddress); replaced.forTrace(ev.replaced); @@ -163,7 +163,7 @@ void Trace::_incomingPacketDropped( ev.codeLocation = Utils::hton(codeLocation); ev.packetId = packetId; // packet IDs are kept in big-endian ev.networkId = Utils::hton(networkId); - memcpy(&ev.peer,peerIdentity.fingerprint().apiFingerprint(),sizeof(ev.peer)); + Utils::copy(&ev.peer,peerIdentity.fingerprint().apiFingerprint()); physicalAddress.forTrace(ev.physicalAddress); ev.hops = hops; ev.verb = verb; @@ -196,8 +196,8 @@ void Trace::_outgoingNetworkFrameDropped( unsigned int l = frameLength; if (l > sizeof(ev.frameHead)) l = sizeof(ev.frameHead); - memcpy(ev.frameHead,frameData,l); - memset(ev.frameHead + l,0,sizeof(ev.frameHead) - l); + Utils::copy(ev.frameHead,frameData,l); + Utils::copy(ev.frameHead + l,0,sizeof(ev.frameHead) - l); } ev.reason = (uint8_t)reason; @@ -226,7 +226,7 @@ void Trace::_incomingNetworkFrameDropped( ev.networkId = Utils::hton(networkId); ev.sourceMac = Utils::hton(sourceMac.toInt()); ev.destMac = Utils::hton(destMac.toInt()); - memcpy(&ev.sender,peerIdentity.fingerprint().apiFingerprint(),sizeof(ev.sender)); + Utils::copy(&ev.sender,peerIdentity.fingerprint().apiFingerprint()); physicalAddress.forTrace(ev.physicalAddress); ev.hops = hops; ev.frameLength = Utils::hton(frameLength); @@ -234,8 +234,8 @@ void Trace::_incomingNetworkFrameDropped( unsigned int l = frameLength; if (l > sizeof(ev.frameHead)) l = sizeof(ev.frameHead); - memcpy(ev.frameHead,frameData,l); - memset(ev.frameHead + l,0,sizeof(ev.frameHead) - l); + Utils::copy(ev.frameHead,frameData,l); + Utils::copy(ev.frameHead + l,0,sizeof(ev.frameHead) - l); } ev.verb = verb; ev.credentialRequestSent = (uint8_t)credentialRequestSent; @@ -282,10 +282,10 @@ void Trace::_networkFilter( ev.evType = ZT_CONST_TO_BE_UINT16(ZT_TRACE_VL2_NETWORK_FILTER); ev.codeLocation = Utils::hton(codeLocation); ev.networkId = Utils::hton(networkId); - memcpy(ev.primaryRuleSetLog,primaryRuleSetLog,sizeof(ev.primaryRuleSetLog)); + Utils::copy(ev.primaryRuleSetLog,primaryRuleSetLog); if (matchingCapabilityRuleSetLog) - memcpy(ev.matchingCapabilityRuleSetLog,matchingCapabilityRuleSetLog,sizeof(ev.matchingCapabilityRuleSetLog)); - else memset(ev.matchingCapabilityRuleSetLog,0,sizeof(ev.matchingCapabilityRuleSetLog)); + Utils::copy(ev.matchingCapabilityRuleSetLog,matchingCapabilityRuleSetLog); + else Utils::zero(ev.matchingCapabilityRuleSetLog); ev.matchingCapabilityId = Utils::hton(matchingCapabilityId); ev.matchingCapabilityTimestamp = Utils::hton(matchingCapabilityTimestamp); ev.source = Utils::hton(source.toInt()); @@ -297,8 +297,8 @@ void Trace::_networkFilter( unsigned int l = frameLength; if (l > sizeof(ev.frameHead)) l = sizeof(ev.frameHead); - memcpy(ev.frameHead,frameData,l); - memset(ev.frameHead + l,0,sizeof(ev.frameHead) - l); + Utils::copy(ev.frameHead,frameData,l); + Utils::copy(ev.frameHead + l,0,sizeof(ev.frameHead) - l); } ev.etherType = Utils::hton(etherType); ev.vlanId = Utils::hton(vlanId); @@ -325,10 +325,10 @@ void Trace::_credentialRejected( ev.codeLocation = Utils::hton(codeLocation); ev.networkId = Utils::hton(networkId); if (identity) { - memcpy(&ev.peer,identity.fingerprint().apiFingerprint(),sizeof(ev.peer)); + Utils::copy(&ev.peer,identity.fingerprint().apiFingerprint()); } else { ev.peer.address = address.toInt(); - memset(ev.peer.hash,0,sizeof(ev.peer.hash)); + Utils::zero(ev.peer.hash); } ev.credentialId = Utils::hton(credentialId); ev.credentialTimestamp = Utils::hton(credentialTimestamp); diff --git a/node/Trace.hpp b/node/Trace.hpp index 26f635243..fa05b8d69 100644 --- a/node/Trace.hpp +++ b/node/Trace.hpp @@ -70,7 +70,7 @@ public: } ZT_INLINE void clear() { - memset(l,0,sizeof(l)); + Utils::zero(l); } }; @@ -82,7 +82,7 @@ public: template struct Str { - ZT_INLINE Str() { memset(s,0,sizeof(s)); } + ZT_INLINE Str() { Utils::zero(s); } constexpr static unsigned int capacity() { return C; } char s[C]; }; diff --git a/node/TriviallyCopyable.hpp b/node/TriviallyCopyable.hpp index 992b7eba2..676731e55 100644 --- a/node/TriviallyCopyable.hpp +++ b/node/TriviallyCopyable.hpp @@ -23,7 +23,7 @@ namespace ZeroTier { /** - * Classes inheriting from this base class are safe to abuse in C-like ways: memcpy, memset, etc. + * Classes inheriting from this base class are safe to abuse in C-like ways. * * It also includes some static methods to do this conveniently. */ @@ -65,7 +65,7 @@ ZT_PACKED_STRUCT(struct TriviallyCopyable static ZT_INLINE void memoryZero(T *obj) noexcept { TriviallyCopyable *const tmp = obj; - memset(tmp,0,sizeof(T)); + Utils::zero(tmp); } /** @@ -78,7 +78,7 @@ ZT_PACKED_STRUCT(struct TriviallyCopyable static ZT_INLINE void memoryZero(T &obj) noexcept { TriviallyCopyable *const tmp = &obj; - memset(tmp,0,sizeof(T)); + Utils::zero(tmp); } /** @@ -92,7 +92,7 @@ ZT_PACKED_STRUCT(struct TriviallyCopyable static ZT_INLINE void memoryCopyUnsafe(T *dest,const void *src) noexcept { TriviallyCopyable *const tmp = dest; - memcpy(tmp,src,sizeof(T)); + Utils::copy(tmp,src); } /** @@ -106,7 +106,7 @@ ZT_PACKED_STRUCT(struct TriviallyCopyable static ZT_INLINE void memoryCopyUnsafe(T &dest,const void *src) noexcept { TriviallyCopyable *const tmp = &dest; - memcpy(tmp,src,sizeof(T)); + Utils::copy(tmp,src); } /** @@ -120,7 +120,7 @@ ZT_PACKED_STRUCT(struct TriviallyCopyable static ZT_INLINE void memoryCopy(T *dest,const T *src) noexcept { TriviallyCopyable *const tmp = dest; - memcpy(tmp,src,sizeof(T)); + Utils::copy(tmp,src); } /** @@ -134,7 +134,7 @@ ZT_PACKED_STRUCT(struct TriviallyCopyable static ZT_INLINE void memoryCopy(T *dest,const T &src) noexcept { TriviallyCopyable *const tmp = dest; - memcpy(tmp,&src,sizeof(T)); + Utils::copy(tmp,&src); } /** @@ -148,7 +148,7 @@ ZT_PACKED_STRUCT(struct TriviallyCopyable static ZT_INLINE void memoryCopy(T &dest,const T *src) noexcept { TriviallyCopyable *const tmp = &dest; - memcpy(tmp,src,sizeof(T)); + Utils::copy(tmp,src); } /** @@ -162,7 +162,7 @@ ZT_PACKED_STRUCT(struct TriviallyCopyable static ZT_INLINE void memoryCopy(T &dest,const T &src) noexcept { TriviallyCopyable *const tmp = &dest; - memcpy(tmp,&src,sizeof(T)); + Utils::copy(tmp,&src); } }); diff --git a/node/Utils.hpp b/node/Utils.hpp index 6b6217a71..09a942f16 100644 --- a/node/Utils.hpp +++ b/node/Utils.hpp @@ -16,6 +16,12 @@ #include "Constants.hpp" +#ifdef ZT_ARCH_X64 +#include +#include +#include +#endif + namespace ZeroTier { namespace Utils { @@ -577,6 +583,260 @@ static ZT_INLINE void storeLittleEndian(void *const p,const I i) noexcept #endif } +template +static ZT_INLINE void copy(void *dest,const void *src) noexcept; +template<> +ZT_INLINE void copy<64>(void *const dest,const void *const src) noexcept +{ +#ifdef ZT_ARCH_X64 + __m128i a = _mm_loadu_si128(reinterpret_cast(src)); + __m128i b = _mm_loadu_si128(reinterpret_cast(src) + 1); + __m128i c = _mm_loadu_si128(reinterpret_cast(src) + 2); + __m128i d = _mm_loadu_si128(reinterpret_cast(src) + 3); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest),a); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1,b); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 2,c); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 3,d); +#else + uint64_t a = reinterpret_cast(src)[0]; + uint64_t b = reinterpret_cast(src)[1]; + uint64_t c = reinterpret_cast(src)[2]; + uint64_t d = reinterpret_cast(src)[3]; + uint64_t e = reinterpret_cast(src)[4]; + uint64_t f = reinterpret_cast(src)[5]; + uint64_t g = reinterpret_cast(src)[6]; + uint64_t h = reinterpret_cast(src)[7]; + reinterpret_cast(dest)[0] = a; + reinterpret_cast(dest)[1] = b; + reinterpret_cast(dest)[2] = c; + reinterpret_cast(dest)[3] = d; + reinterpret_cast(dest)[4] = e; + reinterpret_cast(dest)[5] = f; + reinterpret_cast(dest)[6] = g; + reinterpret_cast(dest)[7] = h; +#endif +} +template<> +ZT_INLINE void copy<32>(void *const dest,const void *const src) noexcept +{ +#ifdef ZT_ARCH_X64 + __m128i a = _mm_loadu_si128(reinterpret_cast(src)); + __m128i b = _mm_loadu_si128(reinterpret_cast(src) + 1); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest),a); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1,b); +#else + uint64_t a = reinterpret_cast(src)[0]; + uint64_t b = reinterpret_cast(src)[1]; + uint64_t c = reinterpret_cast(src)[2]; + uint64_t d = reinterpret_cast(src)[3]; + reinterpret_cast(dest)[0] = a; + reinterpret_cast(dest)[1] = b; + reinterpret_cast(dest)[2] = c; + reinterpret_cast(dest)[3] = d; +#endif +} +template<> +ZT_INLINE void copy<16>(void *const dest,const void *const src) noexcept +{ +#ifdef ZT_ARCH_X64 + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest),_mm_loadu_si128(reinterpret_cast(src))); +#else + uint64_t a = reinterpret_cast(src)[0]; + uint64_t b = reinterpret_cast(src)[1]; + reinterpret_cast(dest)[0] = a; + reinterpret_cast(dest)[1] = b; +#endif +} +template<> +ZT_INLINE void copy<8>(void *const dest,const void *const src) noexcept +{ + *reinterpret_cast(dest) = *reinterpret_cast(src); +} +template<> +ZT_INLINE void copy<4>(void *const dest,const void *const src) noexcept +{ + *reinterpret_cast(dest) = *reinterpret_cast(src); +} +template<> +ZT_INLINE void copy<2>(void *const dest,const void *const src) noexcept +{ + *reinterpret_cast(dest) = *reinterpret_cast(src); +} +template<> +ZT_INLINE void copy<1>(void *const dest,const void *const src) noexcept +{ + *reinterpret_cast(dest) = *reinterpret_cast(src); +} +template<> +ZT_INLINE void copy<0>(void *const dest,const void *const src) noexcept +{ +} +template +static ZT_INLINE void copy(void *const dest,const void *const src) noexcept +{ +#ifdef ZT_NO_UNALIGNED_ACCESS + if ((((uintptr_t)dest | (uintptr_t)src) & 7U) != 0) { + memcpy(dest,src,L); + return; + } +#endif + + uint8_t *d = reinterpret_cast(dest); + const uint8_t *s = reinterpret_cast(src); + + for(unsigned int i=0;i<(L / 64U);++i) { + copy<64>(d,s); + d += 64; + s += 64; + } + if ((L & 63U) >= 32U) { + copy<32>(d,s); + d += 32; + s += 32; + } + if ((L & 31U) >= 16U) { + copy<16>(d,s); + d += 16; + s += 16; + } + if ((L & 15U) >= 8U) { + copy<8>(d,s); + d += 8; + s += 8; + } + if ((L & 7U) >= 4U) { + copy<4>(d,s); + d += 4; + s += 4; + } + if ((L & 3U) >= 2U) { + copy<2>(d,s); + d += 2; + s += 2; + } + if ((L & 1U) != 0U) { + copy<1>(d,s); + } +} +static ZT_INLINE void copy(void *const dest,const void *const src,const unsigned int len) noexcept +{ + memcpy(dest,src,len); +} + +template +static ZT_INLINE void zero(void *dest) noexcept; +template<> +ZT_INLINE void zero<64>(void *const dest) noexcept +{ +#ifdef ZT_ARCH_X64 + const __m128i z = _mm_setzero_si128(); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest),z); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1,z); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 2,z); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 3,z); +#else + const uint64_t z = 0; + reinterpret_cast(dest)[0] = z; + reinterpret_cast(dest)[1] = z; + reinterpret_cast(dest)[2] = z; + reinterpret_cast(dest)[3] = z; + reinterpret_cast(dest)[4] = z; + reinterpret_cast(dest)[5] = z; + reinterpret_cast(dest)[6] = z; + reinterpret_cast(dest)[7] = z; +#endif +} +template<> +ZT_INLINE void zero<32>(void *const dest) noexcept +{ +#ifdef ZT_ARCH_X64 + const __m128i z = _mm_setzero_si128(); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest),z); + _mm_storeu_si128(reinterpret_cast<__m128i *>(dest) + 1,z); +#else + const uint64_t z = 0; + reinterpret_cast(dest)[0] = z; + reinterpret_cast(dest)[1] = z; + reinterpret_cast(dest)[2] = z; + reinterpret_cast(dest)[3] = z; +#endif +} +template<> +ZT_INLINE void zero<16>(void *const dest) noexcept +{ + const uint64_t z = 0; + reinterpret_cast(dest)[0] = z; + reinterpret_cast(dest)[1] = z; +} +template<> +ZT_INLINE void zero<8>(void *const dest) noexcept +{ + *reinterpret_cast(dest) = 0; +} +template<> +ZT_INLINE void zero<4>(void *const dest) noexcept +{ + *reinterpret_cast(dest) = 0; +} +template<> +ZT_INLINE void zero<2>(void *const dest) noexcept +{ + *reinterpret_cast(dest) = 0; +} +template<> +ZT_INLINE void zero<1>(void *const dest) noexcept +{ + *reinterpret_cast(dest) = 0; +} +template<> +ZT_INLINE void zero<0>(void *const dest) noexcept +{ +} +template +static ZT_INLINE void zero(void *const dest) noexcept +{ +#ifdef ZT_NO_UNALIGNED_ACCESS + if ((((uintptr_t)dest | (uintptr_t)src) & 7U) != 0) { + memset(dest,0,L); + return; + } +#endif + + uint8_t *d = reinterpret_cast(dest); + + for(unsigned int i=0;i<(L / 64U);++i) { + zero<64>(d); + d += 64; + } + if ((L & 63U) >= 32U) { + zero<32>(d); + d += 32; + } + if ((L & 31U) >= 16U) { + zero<16>(d); + d += 16; + } + if ((L & 15U) >= 8U) { + zero<8>(d); + d += 8; + } + if ((L & 7U) >= 4U) { + zero<4>(d); + d += 4; + } + if ((L & 3U) >= 2U) { + zero<2>(d); + d += 2; + } + if ((L & 1U) != 0U) { + zero<1>(d); + } +} +static ZT_INLINE void zero(void *const dest,const unsigned int len) noexcept +{ + memset(dest,0,len); +} + } // namespace Utils } // namespace ZeroTier diff --git a/node/VL1.cpp b/node/VL1.cpp index 125693e81..19a32be91 100644 --- a/node/VL1.cpp +++ b/node/VL1.cpp @@ -275,7 +275,7 @@ void VL1::onRemotePacket(void *const tPtr,const int64_t localSocket,const InetAd // Simultaneously decrypt and assemble packet into a contiguous buffer. // Since we moved data around above all slices will have sizes that are // multiples of 64. - memcpy(pkt.b->unsafeData,ph,sizeof(Protocol::Header)); + Utils::copy(pkt.b->unsafeData,ph); pkt.e = sizeof(Protocol::Header); for(FCV::iterator s(pktv.begin());s!=pktv.end();++s) { const unsigned int sliceSize = s->e - s->s; @@ -508,7 +508,7 @@ bool VL1::_HELLO(void *tPtr,const SharedPtr &path,SharedPtr &peer,Bu uint8_t key[ZT_PEER_SECRET_KEY_LENGTH]; if ((peer) && (id == peer->identity())) { - memcpy(key,peer->key(),ZT_PEER_SECRET_KEY_LENGTH); + Utils::copy(key,peer->key()); } else { peer.zero(); if (!RR->identity.agree(id,key)) {