|
@ -20,22 +20,24 @@ |
|
|
#include "cc20.h" |
|
|
#include "cc20.h" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 ---------------------------------------------
|
|
|
#if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 ---------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* get any erorr message out of openssl
|
|
|
// get any erorr message out of openssl
|
|
|
taken from https://en.wikibooks.org/wiki/OpenSSL/Error_handling */
|
|
|
// taken from https://en.wikibooks.org/wiki/OpenSSL/Error_handling
|
|
|
static char *openssl_err_as_string (void) { |
|
|
static char *openssl_err_as_string (void) { |
|
|
BIO *bio = BIO_new (BIO_s_mem ()); |
|
|
|
|
|
ERR_print_errors (bio); |
|
|
BIO *bio = BIO_new(BIO_s_mem()); |
|
|
|
|
|
ERR_print_errors(bio); |
|
|
char *buf = NULL; |
|
|
char *buf = NULL; |
|
|
size_t len = BIO_get_mem_data (bio, &buf); |
|
|
size_t len = BIO_get_mem_data(bio, &buf); |
|
|
char *ret = (char *) calloc (1, 1 + len); |
|
|
char *ret = (char *)calloc(1, 1 + len); |
|
|
|
|
|
|
|
|
if(ret) |
|
|
if(ret) |
|
|
memcpy (ret, buf, len); |
|
|
memcpy(ret, buf, len); |
|
|
|
|
|
|
|
|
|
|
|
BIO_free(bio); |
|
|
|
|
|
|
|
|
BIO_free (bio); |
|
|
|
|
|
return ret; |
|
|
return ret; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
@ -75,7 +77,7 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#elif defined (__SSE2__) // SSE ----------------------------------------------------------
|
|
|
#elif defined (__SSE2__) // SSE2 ---------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// taken (and heavily modified and enhanced) from
|
|
|
// taken (and heavily modified and enhanced) from
|
|
@ -93,15 +95,20 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
#define TWO _mm_setr_epi32(2, 0, 0, 0) |
|
|
#define TWO _mm_setr_epi32(2, 0, 0, 0) |
|
|
|
|
|
|
|
|
#if defined (__SSSE3__) // --- SSSE3
|
|
|
#if defined (__SSSE3__) // --- SSSE3
|
|
|
|
|
|
|
|
|
#define L8 _mm_set_epi32(0x0e0d0c0fL, 0x0a09080bL, 0x06050407L, 0x02010003L) |
|
|
#define L8 _mm_set_epi32(0x0e0d0c0fL, 0x0a09080bL, 0x06050407L, 0x02010003L) |
|
|
#define L16 _mm_set_epi32(0x0d0c0f0eL, 0x09080b0aL, 0x05040706L, 0x01000302L) |
|
|
#define L16 _mm_set_epi32(0x0d0c0f0eL, 0x09080b0aL, 0x05040706L, 0x01000302L) |
|
|
#define ROL8(X) ( _mm_shuffle_epi8(X, L8)) /* SSSE 3 */ |
|
|
#define ROL8(X) ( _mm_shuffle_epi8(X, L8)) /* SSSE 3 */ |
|
|
#define ROL16(X) ( _mm_shuffle_epi8(X, L16)) /* SSSE 3 */ |
|
|
#define ROL16(X) ( _mm_shuffle_epi8(X, L16)) /* SSSE 3 */ |
|
|
|
|
|
|
|
|
#else // --- regular SSE2 ----------
|
|
|
#else // --- regular SSE2 ----------
|
|
|
|
|
|
|
|
|
#define ROL8(X) ROL(X,8) |
|
|
#define ROL8(X) ROL(X,8) |
|
|
#define ROL16(X) ROL(X,16) |
|
|
#define ROL16(X) ROL(X,16) |
|
|
|
|
|
|
|
|
#endif // --------------------------
|
|
|
#endif // --------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define CC20_PERMUTE_ROWS(A,B,C,D) \ |
|
|
#define CC20_PERMUTE_ROWS(A,B,C,D) \ |
|
|
B = _mm_shuffle_epi32(B, _MM_SHUFFLE(0, 3, 2, 1)); \ |
|
|
B = _mm_shuffle_epi32(B, _MM_SHUFFLE(0, 3, 2, 1)); \ |
|
|
C = _mm_shuffle_epi32(C, _MM_SHUFFLE(1, 0, 3, 2)); \ |
|
|
C = _mm_shuffle_epi32(C, _MM_SHUFFLE(1, 0, 3, 2)); \ |
|
@ -129,10 +136,11 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
CC20_EVEN_ROUND(A, B, C, D) |
|
|
CC20_EVEN_ROUND(A, B, C, D) |
|
|
|
|
|
|
|
|
#define STOREXOR(O,I,X) \ |
|
|
#define STOREXOR(O,I,X) \ |
|
|
_mm_storeu_si128 ((__m128i*)O, \ |
|
|
_mm_storeu_si128((__m128i*)O, \ |
|
|
_mm_xor_si128 (_mm_loadu_si128((__m128i*)I), X)); \ |
|
|
_mm_xor_si128(_mm_loadu_si128((__m128i*)I), X)); \ |
|
|
I += 16; O += 16 \ |
|
|
I += 16; O += 16 \ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
const unsigned char *iv, cc20_context_t *ctx) { |
|
|
const unsigned char *iv, cc20_context_t *ctx) { |
|
|
|
|
|
|
|
@ -142,17 +150,16 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
|
|
|
|
|
const uint8_t *magic_constant = (uint8_t*)"expand 32-byte k"; |
|
|
const uint8_t *magic_constant = (uint8_t*)"expand 32-byte k"; |
|
|
|
|
|
|
|
|
a = _mm_loadu_si128 ((__m128i*)magic_constant); |
|
|
a = _mm_loadu_si128((__m128i*)magic_constant); |
|
|
b = _mm_loadu_si128 ((__m128i*)(ctx->key)); |
|
|
b = _mm_loadu_si128((__m128i*)(ctx->key)); |
|
|
c = _mm_loadu_si128 ( (__m128i*)((ctx->key)+16)); |
|
|
c = _mm_loadu_si128( (__m128i*)((ctx->key)+16)); |
|
|
d = _mm_loadu_si128 ((__m128i*)iv); |
|
|
d = _mm_loadu_si128((__m128i*)iv); |
|
|
|
|
|
|
|
|
while (in_len >= 128) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
while(in_len >= 128) { |
|
|
k0 = a; k1 = b; k2 = c; k3 = d; |
|
|
k0 = a; k1 = b; k2 = c; k3 = d; |
|
|
k4 = a; k5 = b; k6 = c; k7 = ADD(d, ONE); |
|
|
k4 = a; k5 = b; k6 = c; k7 = ADD(d, ONE); |
|
|
|
|
|
|
|
|
// 10 double rounds -- in parallel to make better use of all 8 SSE registers
|
|
|
// 10 double rounds -- two in parallel to make better use of all 8 SSE registers
|
|
|
CC20_DOUBLE_ROUND(k0, k1, k2, k3); CC20_DOUBLE_ROUND(k4, k5, k6, k7); |
|
|
CC20_DOUBLE_ROUND(k0, k1, k2, k3); CC20_DOUBLE_ROUND(k4, k5, k6, k7); |
|
|
CC20_DOUBLE_ROUND(k0, k1, k2, k3); CC20_DOUBLE_ROUND(k4, k5, k6, k7); |
|
|
CC20_DOUBLE_ROUND(k0, k1, k2, k3); CC20_DOUBLE_ROUND(k4, k5, k6, k7); |
|
|
CC20_DOUBLE_ROUND(k0, k1, k2, k3); CC20_DOUBLE_ROUND(k4, k5, k6, k7); |
|
|
CC20_DOUBLE_ROUND(k0, k1, k2, k3); CC20_DOUBLE_ROUND(k4, k5, k6, k7); |
|
@ -176,8 +183,7 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
in_len -= 128; |
|
|
in_len -= 128; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (in_len >= 64) { |
|
|
if(in_len >= 64) { |
|
|
|
|
|
|
|
|
k0 = a; k1 = b; k2 = c; k3 = d; |
|
|
k0 = a; k1 = b; k2 = c; k3 = d; |
|
|
|
|
|
|
|
|
// 10 double rounds
|
|
|
// 10 double rounds
|
|
@ -202,8 +208,7 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
in_len -= 64; |
|
|
in_len -= 64; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if (in_len) { |
|
|
if(in_len) { |
|
|
|
|
|
|
|
|
k0 = a; k1 = b; k2 = c; k3 = d; |
|
|
k0 = a; k1 = b; k2 = c; k3 = d; |
|
|
|
|
|
|
|
|
// 10 double rounds
|
|
|
// 10 double rounds
|
|
@ -220,10 +225,10 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
|
|
|
|
|
k0 = ADD(k0, a); k1 = ADD(k1, b); k2 = ADD(k2, c); k3 = ADD(k3, d); |
|
|
k0 = ADD(k0, a); k1 = ADD(k1, b); k2 = ADD(k2, c); k3 = ADD(k3, d); |
|
|
|
|
|
|
|
|
_mm_storeu_si128 ((__m128i*)&(ctx->keystream32[ 0]), k0); |
|
|
_mm_storeu_si128((__m128i*)&(ctx->keystream32[ 0]), k0); |
|
|
_mm_storeu_si128 ((__m128i*)&(ctx->keystream32[ 4]), k1); |
|
|
_mm_storeu_si128((__m128i*)&(ctx->keystream32[ 4]), k1); |
|
|
_mm_storeu_si128 ((__m128i*)&(ctx->keystream32[ 8]), k2); |
|
|
_mm_storeu_si128((__m128i*)&(ctx->keystream32[ 8]), k2); |
|
|
_mm_storeu_si128 ((__m128i*)&(ctx->keystream32[12]), k3); |
|
|
_mm_storeu_si128((__m128i*)&(ctx->keystream32[12]), k3); |
|
|
|
|
|
|
|
|
// keep in mind that out and in got increased inside the last loop
|
|
|
// keep in mind that out and in got increased inside the last loop
|
|
|
// and point to current position now
|
|
|
// and point to current position now
|
|
@ -231,14 +236,13 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
in_len--; |
|
|
in_len--; |
|
|
out[in_len] = in[in_len] ^ keystream8[in_len]; |
|
|
out[in_len] = in[in_len] ^ keystream8[in_len]; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
return(0); |
|
|
return(0); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#else // plain C --------------------------------------------------------------------------
|
|
|
#else // plain C --------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// taken (and modified) from https://github.com/Ginurx/chacha20-c (public domain)
|
|
|
// taken (and modified) from https://github.com/Ginurx/chacha20-c (public domain)
|
|
@ -255,11 +259,13 @@ static void cc20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define ROL32(x,r) (((x)<<(r))|((x)>>(32-(r)))) |
|
|
#define ROL32(x,r) (((x)<<(r))|((x)>>(32-(r)))) |
|
|
|
|
|
|
|
|
#define CC20_QUARTERROUND(x, a, b, c, d) \ |
|
|
#define CC20_QUARTERROUND(x, a, b, c, d) \ |
|
|
x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 16); \ |
|
|
x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 16); \ |
|
|
x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 12); \ |
|
|
x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 12); \ |
|
|
x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ |
|
|
x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ |
|
|
x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7) |
|
|
x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7) |
|
|
|
|
|
|
|
|
#define CC20_DOUBLE_ROUND(s) \ |
|
|
#define CC20_DOUBLE_ROUND(s) \ |
|
|
/* odd round */ \ |
|
|
/* odd round */ \ |
|
|
CC20_QUARTERROUND(s, 0, 4, 8, 12); \ |
|
|
CC20_QUARTERROUND(s, 0, 4, 8, 12); \ |
|
@ -272,6 +278,7 @@ static void cc20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { |
|
|
CC20_QUARTERROUND(s, 2, 7, 8, 13); \ |
|
|
CC20_QUARTERROUND(s, 2, 7, 8, 13); \ |
|
|
CC20_QUARTERROUND(s, 3, 4, 9, 14) |
|
|
CC20_QUARTERROUND(s, 3, 4, 9, 14) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void cc20_block_next(cc20_context_t *ctx) { |
|
|
static void cc20_block_next(cc20_context_t *ctx) { |
|
|
|
|
|
|
|
|
uint32_t *counter = ctx->state + 12; |
|
|
uint32_t *counter = ctx->state + 12; |
|
@ -344,7 +351,6 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
cc20_init_context(ctx, iv); |
|
|
cc20_init_context(ctx, iv); |
|
|
|
|
|
|
|
|
while(in_len >= 64) { |
|
|
while(in_len >= 64) { |
|
|
|
|
|
|
|
|
cc20_block_next(ctx); |
|
|
cc20_block_next(ctx); |
|
|
|
|
|
|
|
|
*(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 0]; in_p++; out_p++; |
|
|
*(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 0]; in_p++; out_p++; |
|
@ -363,11 +369,11 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
*(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[13]; in_p++; out_p++; |
|
|
*(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[13]; in_p++; out_p++; |
|
|
*(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[14]; in_p++; out_p++; |
|
|
*(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[14]; in_p++; out_p++; |
|
|
*(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[15]; in_p++; out_p++; |
|
|
*(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[15]; in_p++; out_p++; |
|
|
|
|
|
|
|
|
in_len -= 64; |
|
|
in_len -= 64; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
if(in_len > 0) { |
|
|
if(in_len > 0) { |
|
|
|
|
|
|
|
|
cc20_block_next(ctx); |
|
|
cc20_block_next(ctx); |
|
|
|
|
|
|
|
|
tmp_len -= in_len; |
|
|
tmp_len -= in_len; |
|
@ -382,14 +388,14 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif // openSSL 1.1, plain C ------------------------------------------------------------
|
|
|
#endif // openSSL 1.1, plain C ------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int cc20_init (const unsigned char *key, cc20_context_t **ctx) { |
|
|
int cc20_init (const unsigned char *key, cc20_context_t **ctx) { |
|
|
|
|
|
|
|
|
// allocate context...
|
|
|
// allocate context...
|
|
|
*ctx = (cc20_context_t*) calloc(1, sizeof(cc20_context_t)); |
|
|
*ctx = (cc20_context_t*)calloc(1, sizeof(cc20_context_t)); |
|
|
if (!(*ctx)) |
|
|
if(!(*ctx)) |
|
|
return -1; |
|
|
return -1; |
|
|
#if defined (HAVE_OPENSSL_1_1) |
|
|
#if defined (HAVE_OPENSSL_1_1) |
|
|
if(!((*ctx)->ctx = EVP_CIPHER_CTX_new())) { |
|
|
if(!((*ctx)->ctx = EVP_CIPHER_CTX_new())) { |
|
@ -409,7 +415,7 @@ int cc20_init (const unsigned char *key, cc20_context_t **ctx) { |
|
|
int cc20_deinit (cc20_context_t *ctx) { |
|
|
int cc20_deinit (cc20_context_t *ctx) { |
|
|
|
|
|
|
|
|
#if defined (HAVE_OPENSSL_1_1) |
|
|
#if defined (HAVE_OPENSSL_1_1) |
|
|
if (ctx->ctx) EVP_CIPHER_CTX_free(ctx->ctx); |
|
|
if(ctx->ctx) EVP_CIPHER_CTX_free(ctx->ctx); |
|
|
#endif |
|
|
#endif |
|
|
return 0; |
|
|
return 0; |
|
|
} |
|
|
} |
|
|