From 9a501a76d191242cd247267df6bdf724ab491b32 Mon Sep 17 00:00:00 2001 From: Adam Ierymenko Date: Tue, 28 Jul 2020 20:44:19 -0700 Subject: [PATCH] Starting on AES ARM intrinsics work. --- core/AES.cpp | 44 ++++++++++++++++++++++++++++++++++++++++++++ core/AES.hpp | 23 +++++++++++++++++++++++ core/OS.hpp | 6 ++++++ 3 files changed, 73 insertions(+) diff --git a/core/AES.cpp b/core/AES.cpp index 2ced6f5f9..d34b524d3 100644 --- a/core/AES.cpp +++ b/core/AES.cpp @@ -1257,4 +1257,48 @@ void AES::_decrypt_aesni(const void *in, void *out) const noexcept #endif // ZT_AES_AESNI +#ifdef ZT_ARCH_ARM_HAS_NEON + +void AES::_encrypt_armneon_crypto(const void *const in, void *const out) const noexcept +{ + uint8x16_t tmp = vld1q_u8(reinterpret_cast(in)); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[0])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[1])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[2])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[3])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[4])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[5])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[6])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[7])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[8])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[9])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[10])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[11])); + tmp = vaesmcq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[12])); + tmp = veorq_u8(vaeseq_u8(tmp, reinterpret_cast(_k.sw.ek)[13]), reinterpret_cast(_k.sw.ek)[14]); + vst1q_u8(reinterpret_cast(out), tmp); +} + +void AES::_decrypt_armneon_crypto(const void *const in, void *const out) const noexcept +{ + uint8x16_t tmp = vld1q_u8(reinterpret_cast(in)); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[0])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[1])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[2])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[3])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[4])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[5])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[6])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[7])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[8])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[9])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[10])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[11])); + tmp = vaesimcq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[12])); + tmp = veorq_u8(vaesdq_u8(tmp, reinterpret_cast(_k.sw.dk)[13]), reinterpret_cast(_k.sw.dk)[14]); + vst1q_u8(reinterpret_cast(out), tmp); +} + +#endif // ZT_ARCH_ARM_HAS_NEON + } // namespace ZeroTier diff --git a/core/AES.hpp b/core/AES.hpp index 44143c20a..70388253a 100644 --- a/core/AES.hpp +++ b/core/AES.hpp @@ -91,6 +91,12 @@ public: _encrypt_aesni(in, out); return; } +#endif +#ifdef ZT_ARCH_ARM_HAS_NEON + if (true) { + _encrypt_armneon_crypto(in, out); + return; + } #endif _encryptSW(reinterpret_cast(in), reinterpret_cast(out)); } @@ -108,6 +114,12 @@ public: _decrypt_aesni(in, out); return; } +#endif +#ifdef ZT_ARCH_ARM_HAS_NEON + if (true) { + _decrypt_armneon_crypto(in, out); + return; + } #endif _decryptSW(reinterpret_cast(in), reinterpret_cast(out)); } @@ -506,7 +518,13 @@ private: struct { uint64_t h[2]; +#if defined(ZT_ARCH_ARM_HAS_NEON) && !defined(_MSC_VER) && !defined(ZT_AES_NO_ACCEL) + __attribute__((aligned(16))) +#endif uint32_t ek[60]; +#if defined(ZT_ARCH_ARM_HAS_NEON) && !defined(_MSC_VER) && !defined(ZT_AES_NO_ACCEL) + __attribute__((aligned(16))) +#endif uint32_t dk[60]; } sw; } _k; @@ -516,6 +534,11 @@ private: void _encrypt_aesni(const void *const in, void *const out) const noexcept; void _decrypt_aesni(const void *in, void *out) const noexcept; #endif + +#ifdef ZT_ARCH_ARM_HAS_NEON + void _encrypt_armneon_crypto(const void *const in, void *const out) const noexcept; + void _decrypt_armneon_crypto(const void *const in, void *const out) const noexcept; +#endif }; } // namespace ZeroTier diff --git a/core/OS.hpp b/core/OS.hpp index 952c73508..111d72625 100644 --- a/core/OS.hpp +++ b/core/OS.hpp @@ -115,6 +115,12 @@ #include #endif +#if (defined(__ARM_NEON) || defined(__ARM_NEON__)) +#define ZT_ARCH_ARM_HAS_NEON 1 +#include +/*#include */ +#endif + #if defined(ZT_ARCH_X64) || defined(i386) || defined(__i386) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) || defined(_M_IX86) || defined(__X86__) || defined(_X86_) || defined(__I86__) || defined(__INTEL__) || defined(__386) #define ZT_ARCH_X86 1 #endif