diff --git a/adv-simd.h b/adv-simd.h index 5fa0313a..657c1b90 100644 --- a/adv-simd.h +++ b/adv-simd.h @@ -1266,6 +1266,7 @@ size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *su CRYPTOPP_ASSERT(length >= 16); const ptrdiff_t blockSize = 16; + // const ptrdiff_t vexBlockSize = 16; ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize; ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0; diff --git a/ppc-simd.h b/ppc-simd.h index 9405ff72..0a02485d 100644 --- a/ppc-simd.h +++ b/ppc-simd.h @@ -17,6 +17,7 @@ #define CRYPTOPP_PPC_CRYPTO_H #include "config.h" +#include "misc.h" #if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING) # include @@ -44,11 +45,19 @@ typedef __vector unsigned long long uint64x2_p; inline uint32x4_p VectorLoad(const byte src[16]) { - // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf - const uint8x16_p perm = vec_lvsl(0, (uint8_t*)src); - const uint8x16_p low = vec_ld(0, (uint8_t*)src); - const uint8x16_p high = vec_ld(15, (uint8_t*)src); - const uint8x16_p data = vec_perm(low, high, perm); + uint8x16_p data; + if (IsAlignedOn(src, 16)) + { + data = vec_ld(0, (uint8_t*)src); + } + else + { + // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf + const uint8x16_p perm = vec_lvsl(0, (uint8_t*)src); + const uint8x16_p low = vec_ld(0, (uint8_t*)src); + const uint8x16_p high = vec_ld(15, (uint8_t*)src); + data = vec_perm(low, high, perm); + } #if defined(CRYPTOPP_BIG_ENDIAN) return (uint32x4_p)data; @@ -67,16 +76,23 @@ inline void VectorStore(const uint32x4_p data, byte dest[16]) const uint8x16_p t1 = (uint8x16_p)data; #endif - // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf - const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest)); - vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest); - vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest); - vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest); - vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest); - vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest); - vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest); - vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest); - vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest); + if (IsAlignedOn(dest, 16)) + { + vec_st(t1, 0, (uint8_t*) dest); + } + else + { + // http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf + const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest)); + vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest); + vec_ste((uint16x8_p) t2, 1, (unsigned short*)dest); + vec_ste((uint32x4_p) t2, 3, (unsigned int*) dest); + vec_ste((uint32x4_p) t2, 4, (unsigned int*) dest); + vec_ste((uint32x4_p) t2, 8, (unsigned int*) dest); + vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest); + vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest); + vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest); + } } inline uint32x4_p VectorXor(const uint32x4_p vec1, const uint32x4_p vec2)