Add GNUmakefile-cross flags SIMON and SPECK
parent
a4c5bdf821
commit
2876371cea
|
|
@ -264,8 +264,10 @@ ifeq ($(IS_NEON),1)
|
||||||
BLAKE2_FLAG += -mfpu=neon
|
BLAKE2_FLAG += -mfpu=neon
|
||||||
LEA_FLAG += -mfpu=neon
|
LEA_FLAG += -mfpu=neon
|
||||||
SIMECK_FLAG += -mfpu=neon
|
SIMECK_FLAG += -mfpu=neon
|
||||||
SIMON_FLAG += -mfpu=neon
|
SIMON64_FLAG += -mfpu=neon
|
||||||
SPECK_FLAG += -mfpu=neon
|
SIMON128_FLAG += -mfpu=neon
|
||||||
|
SPECK64_FLAG += -mfpu=neon
|
||||||
|
SPECK128_FLAG += -mfpu=neon
|
||||||
ifeq ($(IS_ANDROID),1)
|
ifeq ($(IS_ANDROID),1)
|
||||||
ifeq ($(findstring -mfloat-abi=softfp,$(CXXFLAGS)),)
|
ifeq ($(findstring -mfloat-abi=softfp,$(CXXFLAGS)),)
|
||||||
NEON_FLAG += -mfloat-abi=softfp
|
NEON_FLAG += -mfloat-abi=softfp
|
||||||
|
|
@ -274,8 +276,10 @@ ifeq ($(IS_NEON),1)
|
||||||
BLAKE2_FLAG += -mfloat-abi=softfp
|
BLAKE2_FLAG += -mfloat-abi=softfp
|
||||||
LEA_FLAG += -mfloat-abi=softfp
|
LEA_FLAG += -mfloat-abi=softfp
|
||||||
SIMECK_FLAG += -mfloat-abi=softfp
|
SIMECK_FLAG += -mfloat-abi=softfp
|
||||||
SIMON_FLAG += -mfloat-abi=softfp
|
SIMON64_FLAG += -mfloat-abi=softfp
|
||||||
SPECK_FLAG += -mfloat-abi=softfp
|
SIMON128_FLAG += -mfloat-abi=softfp
|
||||||
|
SPECK64_FLAG += -mfloat-abi=softfp
|
||||||
|
SPECK128_FLAG += -mfloat-abi=softfp
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
endif
|
endif
|
||||||
|
|
@ -290,8 +294,10 @@ ifneq ($(IS_ARMv8),0)
|
||||||
LEA_FLAG = -march=armv8-a
|
LEA_FLAG = -march=armv8-a
|
||||||
NEON_FLAG = -march=armv8-a
|
NEON_FLAG = -march=armv8-a
|
||||||
SIMECK_FLAG = -march=armv8-a
|
SIMECK_FLAG = -march=armv8-a
|
||||||
SIMON_FLAG = -march=armv8-a
|
SIMON64_FLAG = -march=armv8-a
|
||||||
SPECK_FLAG = -march=armv8-a
|
SIMON128_FLAG = -march=armv8-a
|
||||||
|
SPECK64_FLAG = -march=armv8-a
|
||||||
|
SPECK128_FLAG = -march=armv8-a
|
||||||
endif
|
endif
|
||||||
HAVE_CRC := $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -march=armv8-a+crc -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __ARM_FEATURE_CRC32)
|
HAVE_CRC := $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -march=armv8-a+crc -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __ARM_FEATURE_CRC32)
|
||||||
ifeq ($(HAVE_CRC),1)
|
ifeq ($(HAVE_CRC),1)
|
||||||
|
|
@ -318,13 +324,13 @@ ifneq ($(IS_i686)$(IS_x86_64),00)
|
||||||
LEA_FLAG = -mssse3
|
LEA_FLAG = -mssse3
|
||||||
SSSE3_FLAG = -mssse3
|
SSSE3_FLAG = -mssse3
|
||||||
SIMECK_FLAG = -mssse3
|
SIMECK_FLAG = -mssse3
|
||||||
SIMON_FLAG = -mssse3
|
SIMON128_FLAG = -mssse3
|
||||||
SPECK_FLAG = -mssse3
|
SPECK128_FLAG = -mssse3
|
||||||
endif
|
endif
|
||||||
HAVE_SSE4 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -msse4.1 -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __SSE4_1__)
|
HAVE_SSE4 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -msse4.1 -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __SSE4_1__)
|
||||||
ifeq ($(HAVE_SSE4),1)
|
ifeq ($(HAVE_SSE4),1)
|
||||||
SIMON_FLAG = -msse4.1
|
SIMON64_FLAG = -msse4.1
|
||||||
SPECK_FLAG = -msse4.1
|
SPECK64_FLAG = -msse4.1
|
||||||
endif
|
endif
|
||||||
HAVE_SSE4 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -msse4.2 -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __SSE4_2__)
|
HAVE_SSE4 = $(shell $(CXX) $(CXXFLAGS) -DADHOC_MAIN -msse4.2 -dM -E adhoc.cpp 2>&1 | $(EGREP) -i -c __SSE4_2__)
|
||||||
ifeq ($(HAVE_SSE4),1)
|
ifeq ($(HAVE_SSE4),1)
|
||||||
|
|
@ -604,13 +610,21 @@ shacal2-simd.o : shacal2-simd.cpp
|
||||||
simeck-simd.o : simeck-simd.cpp
|
simeck-simd.o : simeck-simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(SIMECK_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(SIMECK_FLAG) -c) $<
|
||||||
|
|
||||||
# SSSE3 or NEON available
|
# SSE4.1, NEON or POWER7 available
|
||||||
simon-simd.o : simon-simd.cpp
|
simon64-simd.o : simon64-simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(SIMON_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(SIMON64_FLAG) -c) $<
|
||||||
|
|
||||||
# SSSE3 or NEON available
|
# SSSE3, NEON or POWER8 available
|
||||||
speck-simd.o : speck-simd.cpp
|
simon128-simd.o : simon128-simd.cpp
|
||||||
$(CXX) $(strip $(CXXFLAGS) $(SPECK_FLAG) -c) $<
|
$(CXX) $(strip $(CXXFLAGS) $(SIMON128_FLAG) -c) $<
|
||||||
|
|
||||||
|
# SSE4.1, NEON or POWER7 available
|
||||||
|
speck64-simd.o : speck64-simd.cpp
|
||||||
|
$(CXX) $(strip $(CXXFLAGS) $(SPECK64_FLAG) -c) $<
|
||||||
|
|
||||||
|
# SSSE3, NEON or POWER8 available
|
||||||
|
speck128-simd.o : speck128-simd.cpp
|
||||||
|
$(CXX) $(strip $(CXXFLAGS) $(SPECK128_FLAG) -c) $<
|
||||||
|
|
||||||
# AESNI available
|
# AESNI available
|
||||||
sm4-simd.o : sm4-simd.cpp
|
sm4-simd.o : sm4-simd.cpp
|
||||||
|
|
|
||||||
|
|
@ -342,6 +342,7 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& stat
|
||||||
row2 = ff1 = LOADU( &state.h[4] );
|
row2 = ff1 = LOADU( &state.h[4] );
|
||||||
row3 = LOADU( &BLAKE2S_IV[0] );
|
row3 = LOADU( &BLAKE2S_IV[0] );
|
||||||
row4 = _mm_xor_si128( LOADU( &BLAKE2S_IV[4] ), LOADU( &state.t[0] ) );
|
row4 = _mm_xor_si128( LOADU( &BLAKE2S_IV[4] ), LOADU( &state.t[0] ) );
|
||||||
|
|
||||||
BLAKE2S_ROUND( 0 );
|
BLAKE2S_ROUND( 0 );
|
||||||
BLAKE2S_ROUND( 1 );
|
BLAKE2S_ROUND( 1 );
|
||||||
BLAKE2S_ROUND( 2 );
|
BLAKE2S_ROUND( 2 );
|
||||||
|
|
@ -352,6 +353,7 @@ void BLAKE2_Compress32_SSE4(const byte* input, BLAKE2_State<word32, false>& stat
|
||||||
BLAKE2S_ROUND( 7 );
|
BLAKE2S_ROUND( 7 );
|
||||||
BLAKE2S_ROUND( 8 );
|
BLAKE2S_ROUND( 8 );
|
||||||
BLAKE2S_ROUND( 9 );
|
BLAKE2S_ROUND( 9 );
|
||||||
|
|
||||||
STOREU( &state.h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
|
STOREU( &state.h[0], _mm_xor_si128( ff0, _mm_xor_si128( row1, row3 ) ) );
|
||||||
STOREU( &state.h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
|
STOREU( &state.h[4], _mm_xor_si128( ff1, _mm_xor_si128( row2, row4 ) ) );
|
||||||
}
|
}
|
||||||
|
|
@ -752,6 +754,7 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state
|
||||||
row3h = LOADU( &BLAKE2B_IV[2] );
|
row3h = LOADU( &BLAKE2B_IV[2] );
|
||||||
row4l = _mm_xor_si128( LOADU( &BLAKE2B_IV[4] ), LOADU( &state.t[0] ) );
|
row4l = _mm_xor_si128( LOADU( &BLAKE2B_IV[4] ), LOADU( &state.t[0] ) );
|
||||||
row4h = _mm_xor_si128( LOADU( &BLAKE2B_IV[6] ), LOADU( &state.f[0] ) );
|
row4h = _mm_xor_si128( LOADU( &BLAKE2B_IV[6] ), LOADU( &state.f[0] ) );
|
||||||
|
|
||||||
BLAKE2B_ROUND( 0 );
|
BLAKE2B_ROUND( 0 );
|
||||||
BLAKE2B_ROUND( 1 );
|
BLAKE2B_ROUND( 1 );
|
||||||
BLAKE2B_ROUND( 2 );
|
BLAKE2B_ROUND( 2 );
|
||||||
|
|
@ -764,6 +767,7 @@ void BLAKE2_Compress64_SSE4(const byte* input, BLAKE2_State<word64, true>& state
|
||||||
BLAKE2B_ROUND( 9 );
|
BLAKE2B_ROUND( 9 );
|
||||||
BLAKE2B_ROUND( 10 );
|
BLAKE2B_ROUND( 10 );
|
||||||
BLAKE2B_ROUND( 11 );
|
BLAKE2B_ROUND( 11 );
|
||||||
|
|
||||||
row1l = _mm_xor_si128( row3l, row1l );
|
row1l = _mm_xor_si128( row3l, row1l );
|
||||||
row1h = _mm_xor_si128( row3h, row1h );
|
row1h = _mm_xor_si128( row3h, row1h );
|
||||||
STOREU( &state.h[0], _mm_xor_si128( LOADU( &state.h[0] ), row1l ) );
|
STOREU( &state.h[0], _mm_xor_si128( LOADU( &state.h[0] ), row1l ) );
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue