From f2bc3cd0ca62b6c52f8eb3954dbf9ead657a3ef0 Mon Sep 17 00:00:00 2001 From: Jeffrey Walton Date: Wed, 22 Nov 2017 08:45:38 -0500 Subject: [PATCH] Add speck-simd.cpp to project files (GH #538, #539) Cleaned up whitespace --- Filelist.txt | 1 + cryptest.nmake | 4 ++-- cryptlib.vcxproj | 1 + cryptlib.vcxproj.filters | 3 +++ speck-simd.cpp | 28 +++++++++------------------- 5 files changed, 16 insertions(+), 21 deletions(-) diff --git a/Filelist.txt b/Filelist.txt index 0108acb7..95fca88e 100644 --- a/Filelist.txt +++ b/Filelist.txt @@ -283,6 +283,7 @@ socketft.h sosemanuk.cpp sosemanuk.h speck.cpp +speck-simd.cpp speck.h square.cpp square.h diff --git a/cryptest.nmake b/cryptest.nmake index 3d77296c..411a3ffa 100644 --- a/cryptest.nmake +++ b/cryptest.nmake @@ -47,9 +47,9 @@ # If you use 'make sources' from Linux makefile, then add 'winpipes.cpp' to the list below. -LIB_SRCS = cryptlib.cpp cpu.cpp integer.cpp 3way.cpp adler32.cpp algebra.cpp algparam.cpp arc4.cpp aria-simd.cpp aria.cpp ariatab.cpp asn.cpp authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp blake2-simd.cpp blake2.cpp blowfish.cpp blumshub.cpp camellia.cpp cast.cpp casts.cpp cbcmac.cpp ccm.cpp chacha.cpp channels.cpp cmac.cpp crc-simd.cpp crc.cpp default.cpp des.cpp dessp.cpp dh.cpp dh2.cpp dll.cpp dsa.cpp eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp emsa2.cpp eprecomp.cpp esign.cpp files.cpp filters.cpp fips140.cpp fipstest.cpp gcm-simd.cpp gcm.cpp gf256.cpp gf2_32.cpp gf2n.cpp gfpcrypt.cpp gost.cpp gzip.cpp hex.cpp hmac.cpp hrtimer.cpp ida.cpp idea.cpp iterhash.cpp kalyna.cpp kalynatab.cpp keccak.cpp luc.cpp mars.cpp marss.cpp md2.cpp md4.cpp md5.cpp misc.cpp modes.cpp mqueue.cpp mqv.cpp nbtheory.cpp neon-simd.cpp network.cpp oaep.cpp osrng.cpp padlkrng.cpp panama.cpp pkcspad.cpp poly1305.cpp polynomi.cpp pssr.cpp pubkey.cpp queue.cpp rabin.cpp randpool.cpp rc2.cpp rc5.cpp rc6.cpp rdrand.cpp rdtables.cpp rijndael-simd.cpp rijndael.cpp ripemd.cpp rng.cpp rsa.cpp rw.cpp safer.cpp salsa.cpp seal.cpp seed.cpp serpent.cpp sha-simd.cpp sha.cpp sha3.cpp shacal2-simd.cpp shacal2.cpp shark.cpp sharkbox.cpp simon.cpp skipjack.cpp socketft.cpp sosemanuk.cpp speck.cpp square.cpp squaretb.cpp sse-simd.cpp strciphr.cpp tea.cpp tftables.cpp threefish.cpp tiger.cpp tigertab.cpp trdlocal.cpp ttmac.cpp twofish.cpp vmac.cpp wait.cpp wake.cpp whrlpool.cpp winpipes.cpp xtr.cpp xtrcrypt.cpp zdeflate.cpp zinflate.cpp zlib.cpp +LIB_SRCS = cryptlib.cpp cpu.cpp integer.cpp 3way.cpp adler32.cpp algebra.cpp algparam.cpp arc4.cpp aria-simd.cpp aria.cpp ariatab.cpp asn.cpp authenc.cpp base32.cpp base64.cpp basecode.cpp bfinit.cpp blake2-simd.cpp blake2.cpp blowfish.cpp blumshub.cpp camellia.cpp cast.cpp casts.cpp cbcmac.cpp ccm.cpp chacha.cpp channels.cpp cmac.cpp crc-simd.cpp crc.cpp default.cpp des.cpp dessp.cpp dh.cpp dh2.cpp dll.cpp dsa.cpp eax.cpp ec2n.cpp eccrypto.cpp ecp.cpp elgamal.cpp emsa2.cpp eprecomp.cpp esign.cpp files.cpp filters.cpp fips140.cpp fipstest.cpp gcm-simd.cpp gcm.cpp gf256.cpp gf2_32.cpp gf2n.cpp gfpcrypt.cpp gost.cpp gzip.cpp hex.cpp hmac.cpp hrtimer.cpp ida.cpp idea.cpp iterhash.cpp kalyna.cpp kalynatab.cpp keccak.cpp luc.cpp mars.cpp marss.cpp md2.cpp md4.cpp md5.cpp misc.cpp modes.cpp mqueue.cpp mqv.cpp nbtheory.cpp neon-simd.cpp network.cpp oaep.cpp osrng.cpp padlkrng.cpp panama.cpp pkcspad.cpp poly1305.cpp polynomi.cpp pssr.cpp pubkey.cpp queue.cpp rabin.cpp randpool.cpp rc2.cpp rc5.cpp rc6.cpp rdrand.cpp rdtables.cpp rijndael-simd.cpp rijndael.cpp ripemd.cpp rng.cpp rsa.cpp rw.cpp safer.cpp salsa.cpp seal.cpp seed.cpp serpent.cpp sha-simd.cpp sha.cpp sha3.cpp shacal2-simd.cpp shacal2.cpp shark.cpp sharkbox.cpp simon.cpp skipjack.cpp socketft.cpp sosemanuk.cpp speck.cpp speck-simd.cpp square.cpp squaretb.cpp sse-simd.cpp strciphr.cpp tea.cpp tftables.cpp threefish.cpp tiger.cpp tigertab.cpp trdlocal.cpp ttmac.cpp twofish.cpp vmac.cpp wait.cpp wake.cpp whrlpool.cpp winpipes.cpp xtr.cpp xtrcrypt.cpp zdeflate.cpp zinflate.cpp zlib.cpp -LIB_OBJS = cryptlib.obj cpu.obj integer.obj 3way.obj adler32.obj algebra.obj algparam.obj arc4.obj aria-simd.obj aria.obj ariatab.obj asn.obj authenc.obj base32.obj base64.obj basecode.obj bfinit.obj blake2-simd.obj blake2.obj blowfish.obj blumshub.obj camellia.obj cast.obj casts.obj cbcmac.obj ccm.obj chacha.obj channels.obj cmac.obj crc-simd.obj crc.obj default.obj des.obj dessp.obj dh.obj dh2.obj dll.obj dsa.obj eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj emsa2.obj eprecomp.obj esign.obj files.obj filters.obj fips140.obj fipstest.obj gcm-simd.obj gcm.obj gf256.obj gf2_32.obj gf2n.obj gfpcrypt.obj gost.obj gzip.obj hex.obj hmac.obj hrtimer.obj ida.obj idea.obj iterhash.obj kalyna.obj kalynatab.obj keccak.obj luc.obj mars.obj marss.obj md2.obj md4.obj md5.obj misc.obj modes.obj mqueue.obj mqv.obj nbtheory.obj neon-simd.obj network.obj oaep.obj osrng.obj padlkrng.obj panama.obj pkcspad.obj poly1305.obj polynomi.obj pssr.obj pubkey.obj queue.obj rabin.obj randpool.obj rc2.obj rc5.obj rc6.obj rdrand.obj rdtables.obj rijndael-simd.obj rijndael.obj ripemd.obj rng.obj rsa.obj rw.obj safer.obj salsa.obj seal.obj seed.obj serpent.obj sha-simd.obj sha.obj sha3.obj shacal2-simd.obj shacal2.obj shark.obj sharkbox.obj simon.obj skipjack.obj socketft.obj sosemanuk.obj speck.obj square.obj squaretb.obj sse-simd.obj strciphr.obj tea.obj tftables.obj threefish.obj tiger.obj tigertab.obj trdlocal.obj ttmac.obj twofish.obj vmac.obj wait.obj wake.obj whrlpool.obj winpipes.obj xtr.obj xtrcrypt.obj zdeflate.obj zinflate.obj zlib.obj +LIB_OBJS = cryptlib.obj cpu.obj integer.obj 3way.obj adler32.obj algebra.obj algparam.obj arc4.obj aria-simd.obj aria.obj ariatab.obj asn.obj authenc.obj base32.obj base64.obj basecode.obj bfinit.obj blake2-simd.obj blake2.obj blowfish.obj blumshub.obj camellia.obj cast.obj casts.obj cbcmac.obj ccm.obj chacha.obj channels.obj cmac.obj crc-simd.obj crc.obj default.obj des.obj dessp.obj dh.obj dh2.obj dll.obj dsa.obj eax.obj ec2n.obj eccrypto.obj ecp.obj elgamal.obj emsa2.obj eprecomp.obj esign.obj files.obj filters.obj fips140.obj fipstest.obj gcm-simd.obj gcm.obj gf256.obj gf2_32.obj gf2n.obj gfpcrypt.obj gost.obj gzip.obj hex.obj hmac.obj hrtimer.obj ida.obj idea.obj iterhash.obj kalyna.obj kalynatab.obj keccak.obj luc.obj mars.obj marss.obj md2.obj md4.obj md5.obj misc.obj modes.obj mqueue.obj mqv.obj nbtheory.obj neon-simd.obj network.obj oaep.obj osrng.obj padlkrng.obj panama.obj pkcspad.obj poly1305.obj polynomi.obj pssr.obj pubkey.obj queue.obj rabin.obj randpool.obj rc2.obj rc5.obj rc6.obj rdrand.obj rdtables.obj rijndael-simd.obj rijndael.obj ripemd.obj rng.obj rsa.obj rw.obj safer.obj salsa.obj seal.obj seed.obj serpent.obj sha-simd.obj sha.obj sha3.obj shacal2-simd.obj shacal2.obj shark.obj sharkbox.obj simon.obj skipjack.obj socketft.obj sosemanuk.obj speck.obj speck-simd.obj square.obj squaretb.obj sse-simd.obj strciphr.obj tea.obj tftables.obj threefish.obj tiger.obj tigertab.obj trdlocal.obj ttmac.obj twofish.obj vmac.obj wait.obj wake.obj whrlpool.obj winpipes.obj xtr.obj xtrcrypt.obj zdeflate.obj zinflate.obj zlib.obj TEST_SRCS = bench1.cpp bench2.cpp test.cpp validat0.cpp validat1.cpp validat2.cpp validat3.cpp datatest.cpp regtest1.cpp regtest2.cpp regtest3.cpp fipsalgt.cpp dlltest.cpp fipstest.cpp diff --git a/cryptlib.vcxproj b/cryptlib.vcxproj index 03257d9e..0c87aaa7 100644 --- a/cryptlib.vcxproj +++ b/cryptlib.vcxproj @@ -294,6 +294,7 @@ + diff --git a/cryptlib.vcxproj.filters b/cryptlib.vcxproj.filters index 96afc6e3..7db0d3de 100644 --- a/cryptlib.vcxproj.filters +++ b/cryptlib.vcxproj.filters @@ -377,6 +377,9 @@ Source Files + + Source Files + Source Files diff --git a/speck-simd.cpp b/speck-simd.cpp index a78c3faa..43d081f9 100644 --- a/speck-simd.cpp +++ b/speck-simd.cpp @@ -1,8 +1,8 @@ // speck-simd.cpp - written and placed in the public domain by Jeffrey Walton // // This source file uses intrinsics and built-ins to gain access to -// AES-NI, ARMv8a AES and Power8 AES instructions. A separate source -// file is needed because additional CXXFLAGS are required to enable +// SSSE3, ARM NEON and ARMv8a, and Power7 Altivec instructions. A separate +// source file is needed because additional CXXFLAGS are required to enable // the appropriate instructions sets in some build configurations. #include "pch.h" @@ -64,7 +64,7 @@ inline void SPECK128_Enc_Block(__m128i &block0, const word64 *subkeys, unsigned { // Hack ahead... SPECK128_AdvancedProcessBlocks_SSSE3 loads each SPECK-128 block into a // __m128i. We can't SSE over them, so we rearrange the data to allow packed operations. - // Its also easier to permute them in SPECK128_Enc_4_Blocks rather than the calling code. + // Its also easier to permute them in SPECK128_Enc_Block rather than the calling code. // SPECK128_AdvancedProcessBlocks_SSSE3 is rather messy. __m128i block1 = _mm_setzero_si128(); __m128i x1 = _mm_unpacklo_epi64(block0, block1); @@ -101,8 +101,8 @@ inline void SPECK128_Enc_4_Blocks(__m128i &block0, __m128i &block1, // SPECK128_AdvancedProcessBlocks_SSSE3 is rather messy. __m128i x1 = _mm_unpacklo_epi64(block0, block1); __m128i y1 = _mm_unpackhi_epi64(block0, block1); - __m128i x2 = _mm_unpacklo_epi64(block2, block3); // x2 - __m128i y2 = _mm_unpackhi_epi64(block2, block3); // y2 + __m128i x2 = _mm_unpacklo_epi64(block2, block3); + __m128i y2 = _mm_unpackhi_epi64(block2, block3); const __m128i mask = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7); x1 = _mm_shuffle_epi8(x1, mask); @@ -141,7 +141,7 @@ inline void SPECK128_Dec_Block(__m128i &block0, const word64 *subkeys, unsigned { // Hack ahead... SPECK128_AdvancedProcessBlocks_SSSE3 loads each SPECK-128 block into a // __m128i. We can't SSE over them, so we rearrange the data to allow packed operations. - // Its also easier to permute them in SPECK128_Enc_4_Blocks rather than the calling code. + // Its also easier to permute them in SPECK128_Dec_Block rather than the calling code. // SPECK128_AdvancedProcessBlocks_SSSE3 is rather messy. __m128i block1 = _mm_setzero_si128(); __m128i x1 = _mm_unpacklo_epi64(block0, block1); @@ -155,15 +155,10 @@ inline void SPECK128_Dec_Block(__m128i &block0, const word64 *subkeys, unsigned { const __m128i k1 = _mm_castpd_si128(_mm_loaddup_pd((const double*)(subkeys+i))); - // y ^= x; y1 = _mm_xor_si128(y1, x1); - // y = rotrFixed(y,3); y1 = RotateRight64<3>(y1); - // x ^= k; x1 = _mm_xor_si128(x1, k1); - // x -= y; x1 = _mm_sub_epi64(x1, y1); - // x = rotlFixed(x,8); x1 = RotateLeft64<8>(x1); } @@ -179,12 +174,12 @@ inline void SPECK128_Dec_4_Blocks(__m128i &block0, __m128i &block1, { // Hack ahead... SPECK128_AdvancedProcessBlocks_SSSE3 loads each SPECK-128 block into a // __m128i. We can't SSE over them, so we rearrange the data to allow packed operations. - // Its also easier to permute them in SPECK128_Enc_4_Blocks rather than the calling code. + // Its also easier to permute them in SPECK128_Dec_4_Blocks rather than the calling code. // SPECK128_AdvancedProcessBlocks_SSSE3 is rather messy. __m128i x1 = _mm_unpacklo_epi64(block0, block1); __m128i y1 = _mm_unpackhi_epi64(block0, block1); - __m128i x2 = _mm_unpacklo_epi64(block2, block3); // x2 - __m128i y2 = _mm_unpackhi_epi64(block2, block3); // y2 + __m128i x2 = _mm_unpacklo_epi64(block2, block3); + __m128i y2 = _mm_unpackhi_epi64(block2, block3); const __m128i mask = _mm_set_epi8(8,9,10,11, 12,13,14,15, 0,1,2,3, 4,5,6,7); x1 = _mm_shuffle_epi8(x1, mask); @@ -196,19 +191,14 @@ inline void SPECK128_Dec_4_Blocks(__m128i &block0, __m128i &block1, { const __m128i k1 = _mm_castpd_si128(_mm_loaddup_pd((const double*)(subkeys+i))); - // y ^= x; y1 = _mm_xor_si128(y1, x1); y2 = _mm_xor_si128(y2, x2); - // y = rotrFixed(y,3); y1 = RotateRight64<3>(y1); y2 = RotateRight64<3>(y2); - // x ^= k; x1 = _mm_xor_si128(x1, k1); x2 = _mm_xor_si128(x2, k1); - // x -= y; x1 = _mm_sub_epi64(x1, y1); x2 = _mm_sub_epi64(x2, y2); - // x = rotlFixed(x,8); x1 = RotateLeft64<8>(x1); x2 = RotateLeft64<8>(x2); }