Starting on AES ARM intrinsics work.

This commit is contained in:
Adam Ierymenko 2020-07-28 20:44:19 -07:00
parent becc4aa8ea
commit 9a501a76d1
No known key found for this signature in database
GPG key ID: C8877CF2D7A5D7F3
3 changed files with 73 additions and 0 deletions

View file

@ -1257,4 +1257,48 @@ void AES::_decrypt_aesni(const void *in, void *out) const noexcept
#endif // ZT_AES_AESNI
#ifdef ZT_ARCH_ARM_HAS_NEON
void AES::_encrypt_armneon_crypto(const void *const in, void *const out) const noexcept
{
uint8x16_t tmp = vld1q_u8(reinterpret_cast<const uint8_t *>(in));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[0]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[1]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[2]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[3]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[4]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[5]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[6]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[7]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[8]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[9]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[10]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[11]));
tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[12]));
tmp = veorq_u8(vaeseq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[13]), reinterpret_cast<const uint8x16_t *>(_k.sw.ek)[14]);
vst1q_u8(reinterpret_cast<uint8_t *>(out), tmp);
}
void AES::_decrypt_armneon_crypto(const void *const in, void *const out) const noexcept
{
uint8x16_t tmp = vld1q_u8(reinterpret_cast<const uint8_t *>(in));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[0]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[1]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[2]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[3]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[4]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[5]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[6]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[7]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[8]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[9]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[10]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[11]));
tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[12]));
tmp = veorq_u8(vaesdq_u8(tmp, reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[13]), reinterpret_cast<const uint8x16_t *>(_k.sw.dk)[14]);
vst1q_u8(reinterpret_cast<uint8_t *>(out), tmp);
}
#endif // ZT_ARCH_ARM_HAS_NEON
} // namespace ZeroTier

View file

@ -91,6 +91,12 @@ public:
_encrypt_aesni(in, out);
return;
}
#endif
#ifdef ZT_ARCH_ARM_HAS_NEON
if (true) {
_encrypt_armneon_crypto(in, out);
return;
}
#endif
_encryptSW(reinterpret_cast<const uint8_t *>(in), reinterpret_cast<uint8_t *>(out));
}
@ -108,6 +114,12 @@ public:
_decrypt_aesni(in, out);
return;
}
#endif
#ifdef ZT_ARCH_ARM_HAS_NEON
if (true) {
_decrypt_armneon_crypto(in, out);
return;
}
#endif
_decryptSW(reinterpret_cast<const uint8_t *>(in), reinterpret_cast<uint8_t *>(out));
}
@ -506,7 +518,13 @@ private:
struct
{
uint64_t h[2];
#if defined(ZT_ARCH_ARM_HAS_NEON) && !defined(_MSC_VER) && !defined(ZT_AES_NO_ACCEL)
__attribute__((aligned(16)))
#endif
uint32_t ek[60];
#if defined(ZT_ARCH_ARM_HAS_NEON) && !defined(_MSC_VER) && !defined(ZT_AES_NO_ACCEL)
__attribute__((aligned(16)))
#endif
uint32_t dk[60];
} sw;
} _k;
@ -516,6 +534,11 @@ private:
void _encrypt_aesni(const void *const in, void *const out) const noexcept;
void _decrypt_aesni(const void *in, void *out) const noexcept;
#endif
#ifdef ZT_ARCH_ARM_HAS_NEON
void _encrypt_armneon_crypto(const void *const in, void *const out) const noexcept;
void _decrypt_armneon_crypto(const void *const in, void *const out) const noexcept;
#endif
};
} // namespace ZeroTier

View file

@ -115,6 +115,12 @@
#include <mmintrin.h>
#endif
#if (defined(__ARM_NEON) || defined(__ARM_NEON__))
#define ZT_ARCH_ARM_HAS_NEON 1
#include <arm_neon.h>
/*#include <arm_acle.h>*/
#endif
#if defined(ZT_ARCH_X64) || defined(i386) || defined(__i386) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86) || defined(__X86__) || defined(_X86_) || defined(__I86__) || defined(__INTEL__) || defined(__386)
#define ZT_ARCH_X86 1
#endif