Updated comments
parent
dabad4b409
commit
d9592a303c
|
|
@ -1,10 +1,11 @@
|
||||||
// rijndael-simd.cpp - written and placed in the public domain by
|
// rijndael-simd.cpp - written and placed in the public domain by
|
||||||
// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
|
// Jeffrey Walton, Uri Blumenthal and Marcel Raad.
|
||||||
|
// AES-NI code originally written by Wei Dai.
|
||||||
//
|
//
|
||||||
// This source file uses intrinsics to gain access to AES-NI and
|
// This source file uses intrinsics and built-ins to gain access to
|
||||||
// ARMv8a AES instructions. A separate source file is needed
|
// AES-NI, ARMv8a AES and Power8 AES instructions. A separate source
|
||||||
// because additional CXXFLAGS are required to enable the
|
// file is needed because additional CXXFLAGS are required to enable
|
||||||
// appropriate instructions sets in some build configurations.
|
// the appropriate instructions sets in some build configurations.
|
||||||
//
|
//
|
||||||
// ARMv8a AES code based on CriticalBlue code from Johannes Schneiders,
|
// ARMv8a AES code based on CriticalBlue code from Johannes Schneiders,
|
||||||
// Skip Hovsmith and Barry O'Rourke for the mbedTLS project. Stepping
|
// Skip Hovsmith and Barry O'Rourke for the mbedTLS project. Stepping
|
||||||
|
|
@ -13,13 +14,11 @@
|
||||||
//
|
//
|
||||||
// AltiVec and Power8 code based on http://github.com/noloader/AES-Intrinsics and
|
// AltiVec and Power8 code based on http://github.com/noloader/AES-Intrinsics and
|
||||||
// http://www.ibm.com/developerworks/library/se-power8-in-core-cryptography/
|
// http://www.ibm.com/developerworks/library/se-power8-in-core-cryptography/
|
||||||
// The IBM documentation absolutely sucks. Thanks to Andy Polyakov, Paul R and
|
// For Power8 do not remove the casts, even when const-ness is cast away. It causes
|
||||||
// Trudeaun for answering questions and filling the gaps in the IBM documentation.
|
// a 0.3 to 0.6 cpb drop in performance. The IBM documentation absolutely sucks.
|
||||||
|
// Thanks to Andy Polyakov, Paul R and Trudeaun for answering questions and filling
|
||||||
|
// the gaps in the IBM documentation.
|
||||||
//
|
//
|
||||||
// For Power8 do not remove the casts. It causes a 0.3 to 0.6 cpb drop in performance.
|
|
||||||
// uint8x16_p8 r1 = (uint8x16_p8)VectorLoadKey((const uint8_t*)skptr);
|
|
||||||
// uint8x16_p8 r4 = (uint8x16_p8)VectorLoadKey((const uint8_t*)s_rcon[0]);
|
|
||||||
// uint8x16_p8 r5 = (uint8x16_p8)VectorLoadKey((const uint8_t*)s_mask);
|
|
||||||
|
|
||||||
#include "pch.h"
|
#include "pch.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
@ -891,7 +890,7 @@ static inline void Store64x2(const uint64x2_p8& src, uint8_t dest[16])
|
||||||
// Loads a mis-aligned byte array, performs an endian conversion.
|
// Loads a mis-aligned byte array, performs an endian conversion.
|
||||||
static inline VectorType VectorLoad(const byte src[16])
|
static inline VectorType VectorLoad(const byte src[16])
|
||||||
{
|
{
|
||||||
return (VectorType)Load8x16((uint8_t*)src);
|
return (VectorType)Load8x16(0, (uint8_t*)src);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Loads a mis-aligned byte array, performs an endian conversion.
|
// Loads a mis-aligned byte array, performs an endian conversion.
|
||||||
|
|
@ -1092,9 +1091,9 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
|
||||||
std::memcpy(rk, userKey, keyLen);
|
std::memcpy(rk, userKey, keyLen);
|
||||||
uint8_t* skptr = (uint8_t*)rk;
|
uint8_t* skptr = (uint8_t*)rk;
|
||||||
|
|
||||||
uint8x16_p8 r1 = (uint8x16_p8)VectorLoadKey((const uint8_t*)skptr);
|
uint8x16_p8 r1 = (uint8x16_p8)VectorLoadKey(skptr);
|
||||||
uint8x16_p8 r4 = (uint8x16_p8)VectorLoadKey((const uint8_t*)s_rcon[0]);
|
uint8x16_p8 r4 = (uint8x16_p8)VectorLoadKey(s_rcon[0]);
|
||||||
uint8x16_p8 r5 = (uint8x16_p8)VectorLoadKey((const uint8_t*)s_mask);
|
uint8x16_p8 r5 = (uint8x16_p8)VectorLoadKey(s_mask);
|
||||||
|
|
||||||
#if defined(IS_LITTLE_ENDIAN)
|
#if defined(IS_LITTLE_ENDIAN)
|
||||||
// Only the user key requires byte reversing.
|
// Only the user key requires byte reversing.
|
||||||
|
|
@ -1110,12 +1109,12 @@ void Rijndael_UncheckedSetKey_POWER8(const byte* userKey, size_t keyLen, word32*
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Round 9 using rcon=0x1b */
|
/* Round 9 using rcon=0x1b */
|
||||||
r4 = (uint8x16_p8)VectorLoadKey((const uint8_t*)s_rcon[1]);
|
r4 = (uint8x16_p8)VectorLoadKey(s_rcon[1]);
|
||||||
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
||||||
skptr += 16; VectorStore(r1, skptr);
|
skptr += 16; VectorStore(r1, skptr);
|
||||||
|
|
||||||
/* Round 10 using rcon=0x36 */
|
/* Round 10 using rcon=0x36 */
|
||||||
r4 = (uint8x16_p8)VectorLoadKey((const uint8_t*)s_rcon[2]);
|
r4 = (uint8x16_p8)VectorLoadKey(s_rcon[2]);
|
||||||
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
r1 = Rijndael_Subkey_POWER8(r1, r4, r5);
|
||||||
skptr += 16; VectorStore(r1, skptr);
|
skptr += 16; VectorStore(r1, skptr);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue