diff --git a/aes-armv4.S b/aes-armv4.S
index 69cefb79..9dd3fb3d 100644
--- a/aes-armv4.S
+++ b/aes-armv4.S
@@ -32,6 +32,9 @@
@ Profiler-assisted and platform-specific optimization resulted in 16%
@ improvement on Cortex A8 core and ~21.5 cycles per byte.
+@ JW, JUL 2018: Begin defines from taken from arm_arch.h
+@ The defines were included through the header.
+
# if !defined(__ARM_ARCH__)
# if defined(__CC_ARM)
# define __ARM_ARCH__ __TARGET_ARCH_ARM
@@ -86,6 +89,8 @@
# endif
# endif
+@ JW, JUL 2018: End defines from taken from arm_arch.h
+@ Back to original Cryptogams code
.text
#if defined(__thumb2__) && !defined(__APPLE__)
diff --git a/rijndael.cpp b/rijndael.cpp
index 1553e02e..284135ce 100644
--- a/rijndael.cpp
+++ b/rijndael.cpp
@@ -253,6 +253,10 @@ ANONYMOUS_NAMESPACE_END
unsigned int Rijndael::Base::OptimalDataAlignment() const
{
// CFB mode performs an extra memcpy if buffer is not aligned.
+#if (CRYPTOPP_AESNI_AVAILABLE)
+ if (HasAESNI())
+ return 1;
+#endif
#if (CRYPTOPP_ARM_AES_AVAILABLE)
if (HasAES())
return 1;
diff --git a/strciphr.cpp b/strciphr.cpp
index 73d7d698..3f3bbc10 100644
--- a/strciphr.cpp
+++ b/strciphr.cpp
@@ -195,6 +195,8 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString
if (!length) {return;}
}
+ // TODO: Figure out what is happening on ARM A-32. x86, Aarch64 and PowerPC are OK.
+#if !defined(__arm__)
if (policy.CanIterate() && length >= bytesPerIteration && IsAlignedOn(outString, alignment))
{
const CipherDir cipherDir = GetCipherDir(*this);
@@ -202,7 +204,9 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString
policy.Iterate(outString, inString, cipherDir, length / bytesPerIteration);
else
{
- // GCC and Clang does not like this on ARM.
+ // GCC and Clang does not like this on ARM. If we create
+ // an aligned temp input buffer, memcpy inString to it,
+ // and then use the temp input then things are [mostly] OK.
memcpy(outString, inString, length);
policy.Iterate(outString, outString, cipherDir, length / bytesPerIteration);
}
@@ -210,6 +214,7 @@ void CFB_CipherTemplate::ProcessData(byte *outString, const byte *inString
outString = PtrAdd(outString, length - length % bytesPerIteration);
length %= bytesPerIteration;
}
+#endif
while (length >= bytesPerIteration)
{