Add Power4 unaligned Load and Store
parent
bfc2b5f02d
commit
4f2c605209
|
|
@ -1266,6 +1266,7 @@ size_t AdvancedProcessBlocks128_6x1_ALTIVEC(F1 func1, F6 func6, const word32 *su
|
||||||
CRYPTOPP_ASSERT(length >= 16);
|
CRYPTOPP_ASSERT(length >= 16);
|
||||||
|
|
||||||
const ptrdiff_t blockSize = 16;
|
const ptrdiff_t blockSize = 16;
|
||||||
|
// const ptrdiff_t vexBlockSize = 16;
|
||||||
|
|
||||||
ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize;
|
ptrdiff_t inIncrement = (flags & (BT_InBlockIsCounter|BT_DontIncrementInOutPointers)) ? 0 : blockSize;
|
||||||
ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0;
|
ptrdiff_t xorIncrement = xorBlocks ? blockSize : 0;
|
||||||
|
|
|
||||||
18
ppc-simd.h
18
ppc-simd.h
|
|
@ -17,6 +17,7 @@
|
||||||
#define CRYPTOPP_PPC_CRYPTO_H
|
#define CRYPTOPP_PPC_CRYPTO_H
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include "misc.h"
|
||||||
|
|
||||||
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
#if defined(CRYPTOPP_ALTIVEC_AVAILABLE) || defined(CRYPTOPP_DOXYGEN_PROCESSING)
|
||||||
# include <altivec.h>
|
# include <altivec.h>
|
||||||
|
|
@ -44,11 +45,19 @@ typedef __vector unsigned long long uint64x2_p;
|
||||||
|
|
||||||
inline uint32x4_p VectorLoad(const byte src[16])
|
inline uint32x4_p VectorLoad(const byte src[16])
|
||||||
{
|
{
|
||||||
|
uint8x16_p data;
|
||||||
|
if (IsAlignedOn(src, 16))
|
||||||
|
{
|
||||||
|
data = vec_ld(0, (uint8_t*)src);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
|
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
|
||||||
const uint8x16_p perm = vec_lvsl(0, (uint8_t*)src);
|
const uint8x16_p perm = vec_lvsl(0, (uint8_t*)src);
|
||||||
const uint8x16_p low = vec_ld(0, (uint8_t*)src);
|
const uint8x16_p low = vec_ld(0, (uint8_t*)src);
|
||||||
const uint8x16_p high = vec_ld(15, (uint8_t*)src);
|
const uint8x16_p high = vec_ld(15, (uint8_t*)src);
|
||||||
const uint8x16_p data = vec_perm(low, high, perm);
|
data = vec_perm(low, high, perm);
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(CRYPTOPP_BIG_ENDIAN)
|
#if defined(CRYPTOPP_BIG_ENDIAN)
|
||||||
return (uint32x4_p)data;
|
return (uint32x4_p)data;
|
||||||
|
|
@ -67,6 +76,12 @@ inline void VectorStore(const uint32x4_p data, byte dest[16])
|
||||||
const uint8x16_p t1 = (uint8x16_p)data;
|
const uint8x16_p t1 = (uint8x16_p)data;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
if (IsAlignedOn(dest, 16))
|
||||||
|
{
|
||||||
|
vec_st(t1, 0, (uint8_t*) dest);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
|
// http://www.nxp.com/docs/en/reference-manual/ALTIVECPEM.pdf
|
||||||
const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
|
const uint8x16_p t2 = vec_perm(t1, t1, vec_lvsr(0, dest));
|
||||||
vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest);
|
vec_ste((uint8x16_p) t2, 0, (unsigned char*) dest);
|
||||||
|
|
@ -77,6 +92,7 @@ inline void VectorStore(const uint32x4_p data, byte dest[16])
|
||||||
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest);
|
vec_ste((uint32x4_p) t2, 12, (unsigned int*) dest);
|
||||||
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
|
vec_ste((uint16x8_p) t2, 14, (unsigned short*)dest);
|
||||||
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
|
vec_ste((uint8x16_p) t2, 15, (unsigned char*) dest);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline uint32x4_p VectorXor(const uint32x4_p vec1, const uint32x4_p vec2)
|
inline uint32x4_p VectorXor(const uint32x4_p vec1, const uint32x4_p vec2)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue