Add ChaCha SSE2 implementation
Thanks to Jack Lloyd and Botan for allowing us to use the implementation. The numbers for SSE2 are very good. When compared with Salsa20 ASM the results are: * Salsa20 2.55 cpb; ChaCha/20 2.90 cpb * Salsa20/12 1.61 cpb; ChaCha/12 1.90 cpb * Salsa20/8 1.34 cpb; ChaCha/8 1.5 cpbpull/730/head
parent
35b874b527
commit
916c4484a2
|
|
@ -51,6 +51,7 @@ cbcmac.h
|
|||
ccm.cpp
|
||||
ccm.h
|
||||
chacha.cpp
|
||||
chacha-simd.cpp
|
||||
chacha.h
|
||||
cham.cpp
|
||||
cham-simd.cpp
|
||||
|
|
|
|||
|
|
@ -96,6 +96,28 @@ Ciphertext: \
|
|||
25DDA3E1C1E35D96BDB9CAD13546971B1E5FDB2E83216FEF93E5457DE48A5ED8 \
|
||||
1F7E4B95484834A58B6AF8CCE9545BBBDC58929A1DEB2F6AEBF0DC2079F644E3
|
||||
Test: Encrypt
|
||||
#
|
||||
Comment: All one's key and IV (16-byte key).
|
||||
Key: r16 ff
|
||||
IV: r8 ff
|
||||
Rounds: 8
|
||||
Seek: 32
|
||||
Plaintext: r64 00
|
||||
Ciphertext: \
|
||||
3E0FB640D76FF9C3B9CD99996E6E38FAD13F0E31C82244D33ABBC1B11E8BF12D \
|
||||
9A81D78E9E56604DDFAE136921F51C9D81AE15119DB8E756DD28024493EE571D
|
||||
Test: Encrypt
|
||||
#
|
||||
Comment: All one's key and IV (32-byte key).
|
||||
Key: r32 ff
|
||||
IV: r8 ff
|
||||
Rounds: 8
|
||||
Seek: 32
|
||||
Plaintext: r64 00
|
||||
Ciphertext: \
|
||||
445E0547D31C1623C537DF4BA85C70A9884A35BCBF3DFAB077E98B0F68135F54 \
|
||||
81D4933F8B322AC0CD762C27235CE2B31534E0244A9A2F1FD5E94498D47FF108
|
||||
Test: Encrypt
|
||||
|
||||
#################################################
|
||||
|
||||
|
|
@ -197,6 +219,28 @@ Ciphertext: \
|
|||
5E16F52D6857A1A602A7FC6DDD578CA868F1E51AADD3209034A4740036DE08A7 \
|
||||
A906067C997F01E4E334CBA913407C7A462A968B272834D2D66DF24922F4302C
|
||||
Test: Encrypt
|
||||
#
|
||||
Comment: All one's key and IV (16-byte key).
|
||||
Key: r16 ff
|
||||
IV: r8 ff
|
||||
Rounds: 12
|
||||
Seek: 32
|
||||
Plaintext: r64 00
|
||||
Ciphertext: \
|
||||
83D5597D7A616258EC3C5D5B30E1C5C85C5DFE2F92423B8E36870F3185B6ADD9 \
|
||||
F34DAB6C2BC551898FBDCDFC783F09171CC8B59A8B2852983C3A9B91D29B5761
|
||||
Test: Encrypt
|
||||
#
|
||||
Comment: All one's key and IV (32-byte key).
|
||||
Key: r32 ff
|
||||
IV: r8 ff
|
||||
Rounds: 12
|
||||
Seek: 32
|
||||
Plaintext: r64 00
|
||||
Ciphertext: \
|
||||
CAA69C5AB221A23A57EB5F345C96F4D1322D0A2FF7A9CD43401CD536639A615A \
|
||||
5C9429B55CA3C1B55354559669A154ACA46CD761C41AB8ACE385363B95675F06
|
||||
Test: Encrypt
|
||||
|
||||
#################################################
|
||||
|
||||
|
|
@ -298,3 +342,25 @@ Ciphertext: \
|
|||
4E4C2B336E6EE2F471823808523F073C1BC8785D258AC2BD580209A82A875273 \
|
||||
93DF828B6A6728ABD7AAD0485BFF5CE92C8DB78B1E63929FC76A905E8C7AF310
|
||||
Test: Encrypt
|
||||
#
|
||||
Comment: All one's key and IV (16-byte key).
|
||||
Key: r16 ff
|
||||
IV: r8 ff
|
||||
Rounds: 20
|
||||
Seek: 32
|
||||
Plaintext: r64 00
|
||||
Ciphertext: \
|
||||
399E4760B262F9D53F26D8DD19E56F5C506AE0C3619FA67FB0C408106D0203EE \
|
||||
40EA3CFA61FA32A2FDA8D1238A2135D9D4178775240F99007064A6A7F0C731B6
|
||||
Test: Encrypt
|
||||
#
|
||||
Comment: All one's key and IV (32-byte key).
|
||||
Key: r32 ff
|
||||
IV: r8 ff
|
||||
Rounds: 20
|
||||
Seek: 32
|
||||
Plaintext: r64 00
|
||||
Ciphertext: \
|
||||
3FEABC57FDE54F790C52C8AE43240B79D49042B777BFD6CB80E931270B7F50EB \
|
||||
5BAC2ACD86A836C5DC98C116C1217EC31D3A63A9451319F097F3B4D6DAB07787
|
||||
Test: Encrypt
|
||||
|
|
|
|||
|
|
@ -0,0 +1,289 @@
|
|||
// chacha-simd.cpp - written and placed in the public domain by
|
||||
// Jack Lloyd and Jeffrey Walton
|
||||
//
|
||||
// This source file uses intrinsics and built-ins to gain access to
|
||||
// SSE2, ARM NEON and ARMv8a, and Power7 Altivec instructions. A separate
|
||||
// source file is needed because additional CXXFLAGS are required to enable
|
||||
// the appropriate instructions sets in some build configurations.
|
||||
//
|
||||
// SSE2 implementation based on Botan's chacha_sse2.cpp. Many thanks
|
||||
// to Jack Lloyd and the Botan team for allowing us to use it.
|
||||
//
|
||||
// ARMv8 Power7 is upcoming.
|
||||
|
||||
#include "pch.h"
|
||||
#include "config.h"
|
||||
|
||||
#include "chacha.h"
|
||||
#include "misc.h"
|
||||
|
||||
#if (CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||
# include <xmmintrin.h>
|
||||
# include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
#if (CRYPTOPP_ARM_NEON_AVAILABLE) && 0
|
||||
# include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
// Can't use CRYPTOPP_ARM_XXX_AVAILABLE because too many
|
||||
// compilers don't follow ACLE conventions for the include.
|
||||
#if defined(CRYPTOPP_ARM_ACLE_AVAILABLE)
|
||||
# include <stdint.h>
|
||||
# include <arm_acle.h>
|
||||
#endif
|
||||
|
||||
// Squash MS LNK4221 and libtool warnings
|
||||
extern const char CHACHA_SIMD_FNAME[] = __FILE__;
|
||||
|
||||
ANONYMOUS_NAMESPACE_BEGIN
|
||||
|
||||
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||
|
||||
template <unsigned int R>
|
||||
inline __m128i RotateLeft(const __m128i val)
|
||||
{
|
||||
return _mm_or_si128(_mm_slli_epi32(val, R), _mm_srli_epi32(val, 32-R));
|
||||
}
|
||||
|
||||
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||
|
||||
ANONYMOUS_NAMESPACE_END
|
||||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||
|
||||
void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output,
|
||||
const word32 *input, size_t iterationCount, unsigned int rounds)
|
||||
{
|
||||
const __m128i* input_mm = reinterpret_cast<const __m128i*>(input);
|
||||
__m128i* output_mm = reinterpret_cast<__m128i*>(output);
|
||||
|
||||
__m128i input0 = _mm_loadu_si128(input_mm);
|
||||
__m128i input1 = _mm_loadu_si128(input_mm + 1);
|
||||
__m128i input2 = _mm_loadu_si128(input_mm + 2);
|
||||
__m128i input3 = _mm_loadu_si128(input_mm + 3);
|
||||
|
||||
__m128i r0_0 = input0;
|
||||
__m128i r0_1 = input1;
|
||||
__m128i r0_2 = input2;
|
||||
__m128i r0_3 = input3;
|
||||
|
||||
__m128i r1_0 = input0;
|
||||
__m128i r1_1 = input1;
|
||||
__m128i r1_2 = input2;
|
||||
__m128i r1_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 1));
|
||||
|
||||
__m128i r2_0 = input0;
|
||||
__m128i r2_1 = input1;
|
||||
__m128i r2_2 = input2;
|
||||
__m128i r2_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 2));
|
||||
|
||||
__m128i r3_0 = input0;
|
||||
__m128i r3_1 = input1;
|
||||
__m128i r3_2 = input2;
|
||||
__m128i r3_3 = _mm_add_epi64(r0_3, _mm_set_epi32(0, 0, 0, 3));
|
||||
|
||||
for (int i = static_cast<int>(rounds); i > 0; i -= 2)
|
||||
{
|
||||
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
||||
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
||||
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
||||
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
||||
|
||||
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
||||
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
||||
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
||||
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
||||
|
||||
r0_3 = RotateLeft<16>(r0_3);
|
||||
r1_3 = RotateLeft<16>(r1_3);
|
||||
r2_3 = RotateLeft<16>(r2_3);
|
||||
r3_3 = RotateLeft<16>(r3_3);
|
||||
|
||||
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
||||
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
||||
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
||||
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
||||
|
||||
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
||||
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
||||
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
||||
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
||||
|
||||
r0_1 = RotateLeft<12>(r0_1);
|
||||
r1_1 = RotateLeft<12>(r1_1);
|
||||
r2_1 = RotateLeft<12>(r2_1);
|
||||
r3_1 = RotateLeft<12>(r3_1);
|
||||
|
||||
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
||||
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
||||
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
||||
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
||||
|
||||
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
||||
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
||||
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
||||
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
||||
|
||||
r0_3 = RotateLeft<8>(r0_3);
|
||||
r1_3 = RotateLeft<8>(r1_3);
|
||||
r2_3 = RotateLeft<8>(r2_3);
|
||||
r3_3 = RotateLeft<8>(r3_3);
|
||||
|
||||
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
||||
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
||||
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
||||
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
||||
|
||||
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
||||
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
||||
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
||||
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
||||
|
||||
r0_1 = RotateLeft<7>(r0_1);
|
||||
r1_1 = RotateLeft<7>(r1_1);
|
||||
r2_1 = RotateLeft<7>(r2_1);
|
||||
r3_1 = RotateLeft<7>(r3_1);
|
||||
|
||||
r0_1 = _mm_shuffle_epi32(r0_1, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
r0_2 = _mm_shuffle_epi32(r0_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
r0_3 = _mm_shuffle_epi32(r0_3, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
|
||||
r1_1 = _mm_shuffle_epi32(r1_1, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
r1_2 = _mm_shuffle_epi32(r1_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
r1_3 = _mm_shuffle_epi32(r1_3, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
|
||||
r2_1 = _mm_shuffle_epi32(r2_1, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
r2_2 = _mm_shuffle_epi32(r2_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
r2_3 = _mm_shuffle_epi32(r2_3, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
|
||||
r3_1 = _mm_shuffle_epi32(r3_1, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
r3_2 = _mm_shuffle_epi32(r3_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
|
||||
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
||||
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
||||
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
||||
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
||||
|
||||
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
||||
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
||||
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
||||
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
||||
|
||||
r0_3 = RotateLeft<16>(r0_3);
|
||||
r1_3 = RotateLeft<16>(r1_3);
|
||||
r2_3 = RotateLeft<16>(r2_3);
|
||||
r3_3 = RotateLeft<16>(r3_3);
|
||||
|
||||
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
||||
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
||||
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
||||
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
||||
|
||||
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
||||
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
||||
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
||||
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
||||
|
||||
r0_1 = RotateLeft<12>(r0_1);
|
||||
r1_1 = RotateLeft<12>(r1_1);
|
||||
r2_1 = RotateLeft<12>(r2_1);
|
||||
r3_1 = RotateLeft<12>(r3_1);
|
||||
|
||||
r0_0 = _mm_add_epi32(r0_0, r0_1);
|
||||
r1_0 = _mm_add_epi32(r1_0, r1_1);
|
||||
r2_0 = _mm_add_epi32(r2_0, r2_1);
|
||||
r3_0 = _mm_add_epi32(r3_0, r3_1);
|
||||
|
||||
r0_3 = _mm_xor_si128(r0_3, r0_0);
|
||||
r1_3 = _mm_xor_si128(r1_3, r1_0);
|
||||
r2_3 = _mm_xor_si128(r2_3, r2_0);
|
||||
r3_3 = _mm_xor_si128(r3_3, r3_0);
|
||||
|
||||
r0_3 = RotateLeft<8>(r0_3);
|
||||
r1_3 = RotateLeft<8>(r1_3);
|
||||
r2_3 = RotateLeft<8>(r2_3);
|
||||
r3_3 = RotateLeft<8>(r3_3);
|
||||
|
||||
r0_2 = _mm_add_epi32(r0_2, r0_3);
|
||||
r1_2 = _mm_add_epi32(r1_2, r1_3);
|
||||
r2_2 = _mm_add_epi32(r2_2, r2_3);
|
||||
r3_2 = _mm_add_epi32(r3_2, r3_3);
|
||||
|
||||
r0_1 = _mm_xor_si128(r0_1, r0_2);
|
||||
r1_1 = _mm_xor_si128(r1_1, r1_2);
|
||||
r2_1 = _mm_xor_si128(r2_1, r2_2);
|
||||
r3_1 = _mm_xor_si128(r3_1, r3_2);
|
||||
|
||||
r0_1 = RotateLeft<7>(r0_1);
|
||||
r1_1 = RotateLeft<7>(r1_1);
|
||||
r2_1 = RotateLeft<7>(r2_1);
|
||||
r3_1 = RotateLeft<7>(r3_1);
|
||||
|
||||
r0_1 = _mm_shuffle_epi32(r0_1, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
r0_2 = _mm_shuffle_epi32(r0_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
r0_3 = _mm_shuffle_epi32(r0_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
|
||||
r1_1 = _mm_shuffle_epi32(r1_1, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
r1_2 = _mm_shuffle_epi32(r1_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
r1_3 = _mm_shuffle_epi32(r1_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
|
||||
r2_1 = _mm_shuffle_epi32(r2_1, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
r2_2 = _mm_shuffle_epi32(r2_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
r2_3 = _mm_shuffle_epi32(r2_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
|
||||
r3_1 = _mm_shuffle_epi32(r3_1, _MM_SHUFFLE(2, 1, 0, 3));
|
||||
r3_2 = _mm_shuffle_epi32(r3_2, _MM_SHUFFLE(1, 0, 3, 2));
|
||||
r3_3 = _mm_shuffle_epi32(r3_3, _MM_SHUFFLE(0, 3, 2, 1));
|
||||
}
|
||||
|
||||
r0_0 = _mm_add_epi32(r0_0, input0);
|
||||
r0_1 = _mm_add_epi32(r0_1, input1);
|
||||
r0_2 = _mm_add_epi32(r0_2, input2);
|
||||
r0_3 = _mm_add_epi32(r0_3, input3);
|
||||
|
||||
r1_0 = _mm_add_epi32(r1_0, input0);
|
||||
r1_1 = _mm_add_epi32(r1_1, input1);
|
||||
r1_2 = _mm_add_epi32(r1_2, input2);
|
||||
r1_3 = _mm_add_epi32(r1_3, input3);
|
||||
r1_3 = _mm_add_epi64(r1_3, _mm_set_epi32(0, 0, 0, 1));
|
||||
|
||||
r2_0 = _mm_add_epi32(r2_0, input0);
|
||||
r2_1 = _mm_add_epi32(r2_1, input1);
|
||||
r2_2 = _mm_add_epi32(r2_2, input2);
|
||||
r2_3 = _mm_add_epi32(r2_3, input3);
|
||||
r2_3 = _mm_add_epi64(r2_3, _mm_set_epi32(0, 0, 0, 2));
|
||||
|
||||
r3_0 = _mm_add_epi32(r3_0, input0);
|
||||
r3_1 = _mm_add_epi32(r3_1, input1);
|
||||
r3_2 = _mm_add_epi32(r3_2, input2);
|
||||
r3_3 = _mm_add_epi32(r3_3, input3);
|
||||
r3_3 = _mm_add_epi64(r3_3, _mm_set_epi32(0, 0, 0, 3));
|
||||
|
||||
_mm_storeu_si128(output_mm + 0, r0_0);
|
||||
_mm_storeu_si128(output_mm + 1, r0_1);
|
||||
_mm_storeu_si128(output_mm + 2, r0_2);
|
||||
_mm_storeu_si128(output_mm + 3, r0_3);
|
||||
|
||||
_mm_storeu_si128(output_mm + 4, r1_0);
|
||||
_mm_storeu_si128(output_mm + 5, r1_1);
|
||||
_mm_storeu_si128(output_mm + 6, r1_2);
|
||||
_mm_storeu_si128(output_mm + 7, r1_3);
|
||||
|
||||
_mm_storeu_si128(output_mm + 8, r2_0);
|
||||
_mm_storeu_si128(output_mm + 9, r2_1);
|
||||
_mm_storeu_si128(output_mm + 10, r2_2);
|
||||
_mm_storeu_si128(output_mm + 11, r2_3);
|
||||
|
||||
_mm_storeu_si128(output_mm + 12, r3_0);
|
||||
_mm_storeu_si128(output_mm + 13, r3_1);
|
||||
_mm_storeu_si128(output_mm + 14, r3_2);
|
||||
_mm_storeu_si128(output_mm + 15, r3_3);
|
||||
}
|
||||
|
||||
#endif // CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||
|
||||
NAMESPACE_END
|
||||
59
chacha.cpp
59
chacha.cpp
|
|
@ -11,6 +11,12 @@
|
|||
|
||||
NAMESPACE_BEGIN(CryptoPP)
|
||||
|
||||
#if defined(CRYPTOPP_SSE2_INTRIN_AVAILABLE)
|
||||
extern
|
||||
void ChaCha_OperateKeystream_SSE2(KeystreamOperation operation, byte *output,
|
||||
const word32 *state, size_t iterationCount, unsigned int rounds);
|
||||
#endif
|
||||
|
||||
#define CHACHA_QUARTER_ROUND(a,b,c,d) \
|
||||
a += b; d ^= a; d = rotlConstant<16,word32>(d); \
|
||||
c += d; b ^= c; b = rotlConstant<12,word32>(b); \
|
||||
|
|
@ -24,6 +30,15 @@ void ChaCha_TestInstantiations()
|
|||
}
|
||||
#endif
|
||||
|
||||
std::string ChaCha_Policy::AlgorithmProvider() const
|
||||
{
|
||||
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||
if (HasSSE2())
|
||||
return "SSE2";
|
||||
#endif
|
||||
return "C++";
|
||||
}
|
||||
|
||||
void ChaCha_Policy::CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length)
|
||||
{
|
||||
CRYPTOPP_UNUSED(params);
|
||||
|
|
@ -58,19 +73,19 @@ void ChaCha_Policy::CipherResynchronize(byte *keystreamBuffer, const byte *IV, s
|
|||
|
||||
void ChaCha_Policy::SeekToIteration(lword iterationCount)
|
||||
{
|
||||
CRYPTOPP_UNUSED(iterationCount);
|
||||
throw NotImplemented(std::string(ChaCha_Info::StaticAlgorithmName()) + ": SeekToIteration is not yet implemented");
|
||||
|
||||
// TODO: these were Salsa20, and Wei re-arranged the state array for SSE2 operations.
|
||||
// If we can generate some out-of-band test vectors, then test and implement. Also
|
||||
// see the test vectors in salsa.txt and the use of Seek test argument.
|
||||
// m_state[8] = (word32)iterationCount;
|
||||
// m_state[5] = (word32)SafeRightShift<32>(iterationCount);
|
||||
|
||||
m_state[13] = (word32)iterationCount;
|
||||
m_state[12] = (word32)SafeRightShift<32>(iterationCount);
|
||||
}
|
||||
|
||||
unsigned int ChaCha_Policy::GetAlignment() const
|
||||
{
|
||||
#if CRYPTOPP_SSE2_ASM_AVAILABLE && 0
|
||||
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||
if (HasSSE2())
|
||||
return 16;
|
||||
else
|
||||
|
|
@ -80,7 +95,7 @@ unsigned int ChaCha_Policy::GetAlignment() const
|
|||
|
||||
unsigned int ChaCha_Policy::GetOptimalBlockSize() const
|
||||
{
|
||||
#if CRYPTOPP_SSE2_ASM_AVAILABLE && 0
|
||||
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||
if (HasSSE2())
|
||||
return 4*BYTES_PER_ITERATION;
|
||||
else
|
||||
|
|
@ -88,12 +103,34 @@ unsigned int ChaCha_Policy::GetOptimalBlockSize() const
|
|||
return BYTES_PER_ITERATION;
|
||||
}
|
||||
|
||||
void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount)
|
||||
void ChaCha_Policy::OperateKeystream(KeystreamOperation operation,
|
||||
byte *output, const byte *input, size_t iterationCount)
|
||||
{
|
||||
word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
|
||||
#if CRYPTOPP_SSE2_INTRIN_AVAILABLE
|
||||
if (HasSSE2())
|
||||
{
|
||||
while (iterationCount >= 4)
|
||||
{
|
||||
ChaCha_OperateKeystream_SSE2(operation, output, m_state, iterationCount, m_rounds);
|
||||
|
||||
if ((operation & INPUT_NULL) != INPUT_NULL)
|
||||
xorbuf(output, input, 4*BYTES_PER_ITERATION);
|
||||
|
||||
m_state[12] += 4;
|
||||
if (m_state[12] < 4)
|
||||
m_state[13]++;
|
||||
|
||||
input += 4*BYTES_PER_ITERATION;
|
||||
output += 4*BYTES_PER_ITERATION;
|
||||
iterationCount -= 4;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
while (iterationCount--)
|
||||
{
|
||||
word32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15;
|
||||
|
||||
x0 = m_state[0]; x1 = m_state[1]; x2 = m_state[2]; x3 = m_state[3];
|
||||
x4 = m_state[4]; x5 = m_state[5]; x6 = m_state[6]; x7 = m_state[7];
|
||||
x8 = m_state[8]; x9 = m_state[9]; x10 = m_state[10]; x11 = m_state[11];
|
||||
|
|
@ -112,7 +149,7 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, byte *output,
|
|||
CHACHA_QUARTER_ROUND(x3, x4, x9, x14);
|
||||
}
|
||||
|
||||
#undef CHACHA_OUTPUT
|
||||
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
||||
#define CHACHA_OUTPUT(x){\
|
||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\
|
||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x1 + m_state[1]);\
|
||||
|
|
@ -131,12 +168,12 @@ void ChaCha_Policy::OperateKeystream(KeystreamOperation operation, byte *output,
|
|||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x14 + m_state[14]);\
|
||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x15 + m_state[15]);}
|
||||
|
||||
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
||||
CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(CHACHA_OUTPUT, BYTES_PER_ITERATION);
|
||||
#undef CHACHA_OUTPUT
|
||||
#endif
|
||||
|
||||
++m_state[12];
|
||||
m_state[13] += static_cast<word32>(m_state[12] == 0);
|
||||
if (++m_state[12] == 0)
|
||||
m_state[13]++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
7
chacha.h
7
chacha.h
|
|
@ -35,10 +35,15 @@ protected:
|
|||
void CipherSetKey(const NameValuePairs ¶ms, const byte *key, size_t length);
|
||||
void OperateKeystream(KeystreamOperation operation, byte *output, const byte *input, size_t iterationCount);
|
||||
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length);
|
||||
bool CipherIsRandomAccess() const {return false;} // TODO
|
||||
bool CipherIsRandomAccess() const {return true;}
|
||||
void SeekToIteration(lword iterationCount);
|
||||
|
||||
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
|
||||
unsigned int GetAlignment() const;
|
||||
unsigned int GetOptimalBlockSize() const;
|
||||
#endif
|
||||
|
||||
std::string AlgorithmProvider() const;
|
||||
|
||||
FixedSizeAlignedSecBlock<word32, 16> m_state;
|
||||
int m_rounds;
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ LIB_SRCS = \
|
|||
algparam.cpp arc4.cpp aria-simd.cpp aria.cpp ariatab.cpp asn.cpp \
|
||||
authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp blake2-simd.cpp \
|
||||
blake2.cpp blowfish.cpp blumshub.cpp camellia.cpp cast.cpp casts.cpp \
|
||||
cbcmac.cpp ccm.cpp chacha.cpp cham-simd.cpp cham.cpp channels.cpp \
|
||||
cbcmac.cpp ccm.cpp chacha-simd.cpp chacha.cpp cham-simd.cpp cham.cpp channels.cpp \
|
||||
cmac.cpp crc-simd.cpp crc.cpp default.cpp des.cpp dessp.cpp dh.cpp \
|
||||
dh2.cpp dll.cpp dsa.cpp eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp \
|
||||
emsa2.cpp eprecomp.cpp esign.cpp files.cpp filters.cpp fips140.cpp \
|
||||
|
|
@ -83,7 +83,7 @@ LIB_OBJS = \
|
|||
algparam.obj arc4.obj aria-simd.obj aria.obj ariatab.obj asn.obj \
|
||||
authenc.obj base32.obj base64.obj basecode.obj bfinit.obj blake2-simd.obj \
|
||||
blake2.obj blowfish.obj blumshub.obj camellia.obj cast.obj casts.obj \
|
||||
cbcmac.obj ccm.obj chacha.obj cham-simd.obj cham.obj channels.obj \
|
||||
cbcmac.obj ccm.obj chacha-simd.obj chacha.obj cham-simd.obj cham.obj channels.obj \
|
||||
cmac.obj crc-simd.obj crc.obj default.obj des.obj dessp.obj dh.obj \
|
||||
dh2.obj dll.obj dsa.obj eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj \
|
||||
emsa2.obj eprecomp.obj esign.obj files.obj filters.obj fips140.obj \
|
||||
|
|
|
|||
|
|
@ -1,6 +1,9 @@
|
|||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<PropertyGroup>
|
||||
<LocalDebuggerCommandArguments>v</LocalDebuggerCommandArguments>
|
||||
<LocalDebuggerCommandArguments>tv chacha</LocalDebuggerCommandArguments>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<DebuggerFlavor>WindowsLocalDebugger</DebuggerFlavor>
|
||||
</PropertyGroup>
|
||||
</Project>
|
||||
|
|
@ -191,6 +191,7 @@
|
|||
<ClCompile Include="cbcmac.cpp" />
|
||||
<ClCompile Include="ccm.cpp" />
|
||||
<ClCompile Include="chacha.cpp" />
|
||||
<ClCompile Include="chacha-simd.cpp" />
|
||||
<ClCompile Include="cham.cpp" />
|
||||
<ClCompile Include="cham-simd.cpp" />
|
||||
<ClCompile Include="channels.cpp" />
|
||||
|
|
|
|||
|
|
@ -86,6 +86,9 @@
|
|||
<ClCompile Include="chacha.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="chacha-simd.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="cham.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
|||
|
|
@ -607,6 +607,7 @@ Salsa20_OperateKeystream ENDP
|
|||
QUARTER_ROUND(x3, x12, x9, x6)
|
||||
}
|
||||
|
||||
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
||||
#define SALSA_OUTPUT(x) {\
|
||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 0, x0 + m_state[0]);\
|
||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 1, x13 + m_state[13]);\
|
||||
|
|
@ -625,8 +626,8 @@ Salsa20_OperateKeystream ENDP
|
|||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 14, x6 + m_state[6]);\
|
||||
CRYPTOPP_KEYSTREAM_OUTPUT_WORD(x, LITTLE_ENDIAN_ORDER, 15, x3 + m_state[3]);}
|
||||
|
||||
#ifndef CRYPTOPP_DOXYGEN_PROCESSING
|
||||
CRYPTOPP_KEYSTREAM_OUTPUT_SWITCH(SALSA_OUTPUT, BYTES_PER_ITERATION);
|
||||
#undef SALSA_OUTPUT
|
||||
#endif
|
||||
|
||||
if (++m_state[8] == 0)
|
||||
|
|
|
|||
1
salsa.h
1
salsa.h
|
|
@ -41,6 +41,7 @@ protected:
|
|||
void CipherResynchronize(byte *keystreamBuffer, const byte *IV, size_t length);
|
||||
bool CipherIsRandomAccess() const {return true;}
|
||||
void SeekToIteration(lword iterationCount);
|
||||
|
||||
#if (CRYPTOPP_BOOL_X86 || CRYPTOPP_BOOL_X32 || CRYPTOPP_BOOL_X64)
|
||||
unsigned int GetAlignment() const;
|
||||
unsigned int GetOptimalBlockSize() const;
|
||||
|
|
|
|||
Loading…
Reference in New Issue