mirror of
https://github.com/zerotier/ZeroTierOne.git
synced 2025-06-05 20:13:44 +02:00
Mirror over a few more AES fixes.
This commit is contained in:
parent
c943d0e807
commit
ec76f6e1d2
3 changed files with 36 additions and 25 deletions
22
core/AES.cpp
22
core/AES.cpp
|
@ -149,22 +149,12 @@ void AES::GMAC::update(const void *const data, unsigned int len) noexcept
|
|||
}
|
||||
}
|
||||
|
||||
if (likely(((uintptr_t)in & 7U) == 0U)) {
|
||||
while (len >= 16) {
|
||||
y0 ^= *reinterpret_cast<const uint64_t *>(in);
|
||||
y1 ^= *reinterpret_cast<const uint64_t *>(in + 8);
|
||||
in += 16;
|
||||
s_gfmul(h0, h1, y0, y1);
|
||||
len -= 16;
|
||||
}
|
||||
} else {
|
||||
while (len >= 16) {
|
||||
y0 ^= Utils::loadMachineEndian< uint64_t >(in);
|
||||
y1 ^= Utils::loadMachineEndian< uint64_t >(in + 8);
|
||||
in += 16;
|
||||
s_gfmul(h0, h1, y0, y1);
|
||||
len -= 16;
|
||||
}
|
||||
while (len >= 16) {
|
||||
y0 ^= Utils::loadMachineEndian< uint64_t >(in);
|
||||
y1 ^= Utils::loadMachineEndian< uint64_t >(in + 8);
|
||||
in += 16;
|
||||
s_gfmul(h0, h1, y0, y1);
|
||||
len -= 16;
|
||||
}
|
||||
|
||||
_y[0] = y0;
|
||||
|
|
|
@ -26,7 +26,9 @@ namespace {
|
|||
|
||||
const __m128i s_sseSwapBytes = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul")))
|
||||
#endif
|
||||
__m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
|
||||
{
|
||||
y = _mm_shuffle_epi8(y, s_sseSwapBytes);
|
||||
|
@ -57,7 +59,9 @@ __m128i p_gmacPCLMUL128(const __m128i h, __m128i y) noexcept
|
|||
|
||||
#define ZT_AES_VAES512 1
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("sse4,aes,avx,avx2,vaes,avx512f,avx512bw")))
|
||||
#endif
|
||||
void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, const uint8_t *&in, uint8_t *&out, const __m128i *const k) noexcept
|
||||
{
|
||||
const __m512i kk0 = _mm512_broadcast_i32x4(k[0]);
|
||||
|
@ -107,7 +111,9 @@ void p_aesCtrInnerVAES512(unsigned int &len, const uint64_t c0, uint64_t &c1, co
|
|||
|
||||
#define ZT_AES_VAES256 1
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("sse4,aes,avx,avx2,vaes")))
|
||||
#endif
|
||||
void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, const uint8_t *&in, uint8_t *&out, const __m128i *const k) noexcept
|
||||
{
|
||||
const __m256i kk0 = _mm256_broadcastsi128_si256(k[0]);
|
||||
|
@ -175,7 +181,9 @@ void p_aesCtrInnerVAES256(unsigned int &len, const uint64_t c0, uint64_t &c1, co
|
|||
|
||||
#endif // does compiler support AVX2 and AVX512 AES intrinsics?
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
#endif
|
||||
__m128i p_init256_1_aesni(__m128i a, __m128i b) noexcept
|
||||
{
|
||||
__m128i x, y;
|
||||
|
@ -190,7 +198,9 @@ __m128i p_init256_1_aesni(__m128i a, __m128i b) noexcept
|
|||
return x;
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
#endif
|
||||
__m128i p_init256_2_aesni(__m128i a, __m128i b) noexcept
|
||||
{
|
||||
__m128i x, y, z;
|
||||
|
@ -208,7 +218,9 @@ __m128i p_init256_2_aesni(__m128i a, __m128i b) noexcept
|
|||
|
||||
} // anonymous namespace
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul")))
|
||||
#endif
|
||||
void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept
|
||||
{
|
||||
__m128i y = _mm_loadu_si128(reinterpret_cast<const __m128i *>(_y));
|
||||
|
@ -274,7 +286,9 @@ void AES::GMAC::p_aesNIUpdate(const uint8_t *in, unsigned int len) noexcept
|
|||
_rp = len; // len is always less than 16 here
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,pclmul,aes")))
|
||||
#endif
|
||||
void AES::GMAC::p_aesNIFinish(uint8_t tag[16]) noexcept
|
||||
{
|
||||
__m128i y = _mm_loadu_si128(reinterpret_cast<const __m128i *>(_y));
|
||||
|
@ -345,7 +359,9 @@ void AES::GMAC::p_aesNIFinish(uint8_t tag[16]) noexcept
|
|||
_mm_storeu_si128(reinterpret_cast<__m128i *>(tag), _mm_xor_si128(_mm_shuffle_epi8(t4, s_sseSwapBytes), encIV));
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes")))
|
||||
#endif
|
||||
void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) noexcept
|
||||
{
|
||||
const __m128i dd = _mm_set_epi64x(0, (long long)_ctr[0]);
|
||||
|
@ -542,7 +558,9 @@ void AES::CTR::p_aesNICrypt(const uint8_t *in, uint8_t *out, unsigned int len) n
|
|||
_ctr[1] = Utils::hton(c1);
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
#endif
|
||||
void AES::p_init_aesni(const uint8_t *key) noexcept
|
||||
{
|
||||
__m128i t1, t2, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13;
|
||||
|
@ -604,7 +622,9 @@ void AES::p_init_aesni(const uint8_t *key) noexcept
|
|||
p_k.ni.h2[3] = _mm_xor_si128(_mm_shuffle_epi32(hhhh, 78), hhhh);
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
#endif
|
||||
void AES::p_encrypt_aesni(const void *const in, void *const out) const noexcept
|
||||
{
|
||||
__m128i tmp = _mm_loadu_si128((const __m128i *)in);
|
||||
|
@ -625,7 +645,9 @@ void AES::p_encrypt_aesni(const void *const in, void *const out) const noexcept
|
|||
_mm_storeu_si128((__m128i *)out, _mm_aesenclast_si128(tmp, p_k.ni.k[14]));
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
__attribute__((__target__("ssse3,sse4,sse4.1,sse4.2,aes,pclmul")))
|
||||
#endif
|
||||
void AES::p_decrypt_aesni(const void *in, void *out) const noexcept
|
||||
{
|
||||
__m128i tmp = _mm_loadu_si128((const __m128i *)in);
|
||||
|
|
|
@ -131,7 +131,7 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
|
|||
uint8x16_t k14 = _aes.p_k.neon.ek[14];
|
||||
|
||||
unsigned int totalLen = _len;
|
||||
if ((totalLen & 15U)) {
|
||||
if ((totalLen & 15U) != 0) {
|
||||
for (;;) {
|
||||
if (unlikely(!len)) {
|
||||
vst1q_u8(reinterpret_cast<uint8_t *>(_ctr), vrev32q_u8(dd));
|
||||
|
@ -140,7 +140,7 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
|
|||
}
|
||||
--len;
|
||||
out[totalLen++] = *(in++);
|
||||
if (!(totalLen & 15U)) {
|
||||
if ((totalLen & 15U) == 0) {
|
||||
uint8_t *const otmp = out + (totalLen - 16);
|
||||
uint8x16_t d0 = vrev32q_u8(dd);
|
||||
uint8x16_t pt = vld1q_u8(otmp);
|
||||
|
@ -180,7 +180,10 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
|
|||
uint8x16_t d2 = vrev32q_u8(dd2);
|
||||
uint8x16_t d3 = vrev32q_u8(dd3);
|
||||
uint8x16_t pt0 = vld1q_u8(in);
|
||||
in += 16;
|
||||
uint8x16_t pt1 = vld1q_u8(in + 16);
|
||||
uint8x16_t pt2 = vld1q_u8(in + 32);
|
||||
uint8x16_t pt3 = vld1q_u8(in + 48);
|
||||
|
||||
d0 = vaesmcq_u8(vaeseq_u8(d0, k0));
|
||||
d1 = vaesmcq_u8(vaeseq_u8(d1, k0));
|
||||
d2 = vaesmcq_u8(vaeseq_u8(d2, k0));
|
||||
|
@ -193,8 +196,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
|
|||
d1 = vaesmcq_u8(vaeseq_u8(d1, k2));
|
||||
d2 = vaesmcq_u8(vaeseq_u8(d2, k2));
|
||||
d3 = vaesmcq_u8(vaeseq_u8(d3, k2));
|
||||
uint8x16_t pt1 = vld1q_u8(in);
|
||||
in += 16;
|
||||
d0 = vaesmcq_u8(vaeseq_u8(d0, k3));
|
||||
d1 = vaesmcq_u8(vaeseq_u8(d1, k3));
|
||||
d2 = vaesmcq_u8(vaeseq_u8(d2, k3));
|
||||
|
@ -207,8 +208,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
|
|||
d1 = vaesmcq_u8(vaeseq_u8(d1, k5));
|
||||
d2 = vaesmcq_u8(vaeseq_u8(d2, k5));
|
||||
d3 = vaesmcq_u8(vaeseq_u8(d3, k5));
|
||||
uint8x16_t pt2 = vld1q_u8(in);
|
||||
in += 16;
|
||||
d0 = vaesmcq_u8(vaeseq_u8(d0, k6));
|
||||
d1 = vaesmcq_u8(vaeseq_u8(d1, k6));
|
||||
d2 = vaesmcq_u8(vaeseq_u8(d2, k6));
|
||||
|
@ -221,8 +220,6 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
|
|||
d1 = vaesmcq_u8(vaeseq_u8(d1, k8));
|
||||
d2 = vaesmcq_u8(vaeseq_u8(d2, k8));
|
||||
d3 = vaesmcq_u8(vaeseq_u8(d3, k8));
|
||||
uint8x16_t pt3 = vld1q_u8(in);
|
||||
in += 16;
|
||||
d0 = vaesmcq_u8(vaeseq_u8(d0, k9));
|
||||
d1 = vaesmcq_u8(vaeseq_u8(d1, k9));
|
||||
d2 = vaesmcq_u8(vaeseq_u8(d2, k9));
|
||||
|
@ -253,7 +250,9 @@ void AES::CTR::p_armCrypt(const uint8_t *in, uint8_t *out, unsigned int len) noe
|
|||
vst1q_u8(out + 16, d1);
|
||||
vst1q_u8(out + 32, d2);
|
||||
vst1q_u8(out + 48, d3);
|
||||
|
||||
out += 64;
|
||||
in += 64;
|
||||
|
||||
dd = (uint8x16_t)vaddq_u32((uint32x4_t)dd, four);
|
||||
if (unlikely(len < 64))
|
||||
|
|
Loading…
Add table
Reference in a new issue