From 399ecf8801f1039d7f875a9e4649e7effde94906 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Tue, 1 Sep 2020 15:13:25 +0545 Subject: [PATCH 01/10] reworked cc20 --- include/cc20.h | 57 +++++++++++++ include/n2n.h | 1 + src/cc20.c | 110 +++++++++++++++++++++++++ src/transform_cc20.c | 186 ++++++++++--------------------------------- 4 files changed, 212 insertions(+), 142 deletions(-) create mode 100644 include/cc20.h create mode 100644 src/cc20.c diff --git a/include/cc20.h b/include/cc20.h new file mode 100644 index 0000000..1bc52eb --- /dev/null +++ b/include/cc20.h @@ -0,0 +1,57 @@ +/** + * (C) 2007-20 - ntop.org and contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not see see + * + */ + + +#ifndef CC20_H +#define CC20_H + +#include "n2n.h" // HAVE_OPENSSL_1_1, traceEvent ... + + +#ifdef HAVE_OPENSSL_1_1 + + +#include +#include +#include + +#define CC20_IV_SIZE 16 +#define CC20_KEY_BYTES (256/8) + + +typedef struct cc20_context_t { + EVP_CIPHER_CTX *ctx; /* openssl's reusable evp_* en/de-cryption context */ + const EVP_CIPHER *cipher; /* cipher to use: e.g. EVP_chacha20() */ + uint8_t key[CC20_KEY_BYTES]; /* the pure key data for payload encryption & decryption */ +} cc20_context_t; + + +int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, + const unsigned char *iv, cc20_context_t *ctx); + + +int cc20_init (const unsigned char *key, cc20_context_t **ctx); + + +int cc20_deinit (cc20_context_t *ctx); + + +#endif // HAVE_OPENSSL_1_1 + + +#endif // CC20_H diff --git a/include/n2n.h b/include/n2n.h index dc08248..74b8798 100644 --- a/include/n2n.h +++ b/include/n2n.h @@ -160,6 +160,7 @@ typedef struct ether_hdr ether_hdr_t; #include "pearson.h" #include "portable_endian.h" #include "aes.h" +#include "cc20.h" #include "speck.h" #include "n2n_regex.h" diff --git a/src/cc20.c b/src/cc20.c new file mode 100644 index 0000000..87bb5e1 --- /dev/null +++ b/src/cc20.c @@ -0,0 +1,110 @@ +/** + * (C) 2007-20 - ntop.org and contributors + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not see see + * + */ + + +#include "cc20.h" + + +#ifdef HAVE_OPENSSL_1_1 + + +/* ****************************************************** */ + +/* get any erorr message out of openssl + taken from https://en.wikibooks.org/wiki/OpenSSL/Error_handling */ +static char *openssl_err_as_string (void) { + BIO *bio = BIO_new (BIO_s_mem ()); + ERR_print_errors (bio); + char *buf = NULL; + size_t len = BIO_get_mem_data (bio, &buf); + char *ret = (char *) calloc (1, 1 + len); + + if(ret) + memcpy (ret, buf, len); + + BIO_free (bio); + return ret; +} + +/* ****************************************************** */ + +// encryption == decryption +int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, + const unsigned char *iv, cc20_context_t *ctx) { + + int evp_len; + int evp_ciphertext_len; + + if(1 == EVP_EncryptInit_ex(ctx->ctx, ctx->cipher, NULL, ctx->key, iv)) { + if(1 == EVP_CIPHER_CTX_set_padding(ctx->ctx, 0)) { + if(1 == EVP_EncryptUpdate(ctx->ctx, out, &evp_len, in, in_len)) { + evp_ciphertext_len = evp_len; + if(1 == EVP_EncryptFinal_ex(ctx->ctx, out + evp_len, &evp_len)) { + evp_ciphertext_len += evp_len; + if(evp_ciphertext_len != in_len) + traceEvent(TRACE_ERROR, "cc20_crypt openssl encryption: encrypted %u bytes where %u were expected", + evp_ciphertext_len, in_len); + } else + traceEvent(TRACE_ERROR, "cc20_crypt openssl final encryption: %s", + openssl_err_as_string()); + } else + traceEvent(TRACE_ERROR, "cc20_encrypt openssl encrpytion: %s", + openssl_err_as_string()); + } else + traceEvent(TRACE_ERROR, "cc20_encrypt openssl padding setup: %s", + openssl_err_as_string()); + } else + traceEvent(TRACE_ERROR, "cc20_encrypt openssl init: %s", + openssl_err_as_string()); + + EVP_CIPHER_CTX_reset(ctx->ctx); + + return 0; +} + + +int cc20_init (const unsigned char *key, cc20_context_t **ctx) { + + // allocate context... + *ctx = (cc20_context_t*) calloc(1, sizeof(cc20_context_t)); + if (!(*ctx)) + return -1; + + if(!((*ctx)->ctx = EVP_CIPHER_CTX_new())) { + traceEvent(TRACE_ERROR, "cc20_init openssl's evp_* encryption context creation failed: %s", + openssl_err_as_string()); + return -1; + } + + (*ctx)->cipher = EVP_chacha20(); + + memcpy((*ctx)->key, key, CC20_KEY_BYTES); + + return 0; +} + + +int cc20_deinit (cc20_context_t *ctx) { + + if (ctx->ctx) EVP_CIPHER_CTX_free(ctx->ctx); + + return 0; +} + + +#endif // HAVE_OPENSSL_1_1 diff --git a/src/transform_cc20.c b/src/transform_cc20.c index 0665c3b..d3c42f3 100644 --- a/src/transform_cc20.c +++ b/src/transform_cc20.c @@ -20,33 +20,21 @@ #ifdef HAVE_OPENSSL_1_1 -#include -#include -#include - -#define N2N_CC20_IVEC_SIZE 16 - -#define CC20_KEY_BYTES (256/8) - /* ChaCha20 plaintext preamble */ -#define TRANSOP_CC20_PREAMBLE_SIZE (N2N_CC20_IVEC_SIZE) - -typedef unsigned char n2n_cc20_ivec_t[N2N_CC20_IVEC_SIZE]; +#define CC20_PREAMBLE_SIZE (CC20_IV_SIZE) typedef struct transop_cc20 { - EVP_CIPHER_CTX *enc_ctx; /* openssl's reusable evp_* encryption context */ - EVP_CIPHER_CTX *dec_ctx; /* openssl's reusable evp_* decryption context */ - const EVP_CIPHER *cipher; /* cipher to use: EVP_chacha20() */ - uint8_t key[32]; /* the pure key data for payload encryption & decryption */ + cc20_context_t *ctx; } transop_cc20_t; /* ****************************************************** */ static int transop_deinit_cc20(n2n_trans_op_t *arg) { + transop_cc20_t *priv = (transop_cc20_t *)arg->priv; - EVP_CIPHER_CTX_free(priv->enc_ctx); - EVP_CIPHER_CTX_free(priv->dec_ctx); + if(priv->ctx) + cc20_deinit(priv->ctx); if(priv) free(priv); @@ -56,35 +44,6 @@ static int transop_deinit_cc20(n2n_trans_op_t *arg) { /* ****************************************************** */ -/* get any erorr message out of openssl - taken from https://en.wikibooks.org/wiki/OpenSSL/Error_handling */ -static char *openssl_err_as_string (void) { - BIO *bio = BIO_new (BIO_s_mem ()); - ERR_print_errors (bio); - char *buf = NULL; - size_t len = BIO_get_mem_data (bio, &buf); - char *ret = (char *) calloc (1, 1 + len); - - if(ret) - memcpy (ret, buf, len); - - BIO_free (bio); - return ret; -} - -/* ****************************************************** */ - -static void set_cc20_iv(transop_cc20_t *priv, n2n_cc20_ivec_t ivec) { - // keep in mind the following condition: N2N_CC20_IVEC_SIZE % sizeof(rand_value) == 0 ! - uint64_t rand_value; - for (uint8_t i = 0; i < N2N_CC20_IVEC_SIZE; i += sizeof(rand_value)) { - rand_value = n2n_rand(); - memcpy(ivec + i, &rand_value, sizeof(rand_value)); - } -} - -/* ****************************************************** */ - /** The ChaCha20 packet format consists of: * * - a 128-bit random IV @@ -99,56 +58,30 @@ static int transop_encode_cc20(n2n_trans_op_t * arg, const uint8_t * inbuf, size_t in_len, const uint8_t * peer_mac) { + int len=-1; transop_cc20_t * priv = (transop_cc20_t *)arg->priv; uint8_t assembly[N2N_PKT_BUF_SIZE] = {0}; if(in_len <= N2N_PKT_BUF_SIZE) { - if((in_len + TRANSOP_CC20_PREAMBLE_SIZE) <= out_len) { + if((in_len + CC20_PREAMBLE_SIZE) <= out_len) { + size_t idx=0; - n2n_cc20_ivec_t enc_ivec = {0}; traceEvent(TRACE_DEBUG, "encode_cc20 %lu bytes", in_len); - /* Generate and encode the IV. */ - set_cc20_iv(priv, enc_ivec); - encode_buf(outbuf, &idx, &enc_ivec, N2N_CC20_IVEC_SIZE); + // full IV sized random value (128 bit) + encode_uint64(outbuf, &idx, n2n_rand()); + encode_uint64(outbuf, &idx, n2n_rand()); - /* Encrypt the assembly contents and write the ciphertext after the iv. */ - /* len is set to the length of the cipher plain text to be encrpyted - which is (in this case) identical to original packet lentgh */ len = in_len; + cc20_crypt(outbuf + CC20_PREAMBLE_SIZE, + inbuf, + in_len, + outbuf, // IV + priv->ctx); - /* The assembly buffer is a source for encrypting data. - * The whole contents of assembly are encrypted. */ - memcpy(assembly, inbuf, in_len); - - EVP_CIPHER_CTX *ctx = priv->enc_ctx; - int evp_len; - int evp_ciphertext_len; - - if(1 == EVP_EncryptInit_ex(ctx, priv->cipher, NULL, priv->key, enc_ivec)) { - if(1 == EVP_CIPHER_CTX_set_padding(ctx, 0)) { - if(1 == EVP_EncryptUpdate(ctx, outbuf + TRANSOP_CC20_PREAMBLE_SIZE, &evp_len, assembly, len)) { - evp_ciphertext_len = evp_len; - if(1 == EVP_EncryptFinal_ex(ctx, outbuf + TRANSOP_CC20_PREAMBLE_SIZE + evp_len, &evp_len)) { - evp_ciphertext_len += evp_len; - - if(evp_ciphertext_len != len) - traceEvent(TRACE_ERROR, "encode_cc20 openssl encryption: encrypted %u bytes where %u were expected.\n", - evp_ciphertext_len, len); - } else - traceEvent(TRACE_ERROR, "encode_cc20 openssl final encryption: %s\n", openssl_err_as_string()); - } else - traceEvent(TRACE_ERROR, "encode_cc20 openssl encrpytion: %s\n", openssl_err_as_string()); - } else - traceEvent(TRACE_ERROR, "encode_cc20 openssl padding setup: %s\n", openssl_err_as_string()); - } else - traceEvent(TRACE_ERROR, "encode_cc20 openssl init: %s\n", openssl_err_as_string()); - - EVP_CIPHER_CTX_reset(ctx); - - len += TRANSOP_CC20_PREAMBLE_SIZE; /* size of data carried in UDP. */ + len += CC20_PREAMBLE_SIZE; /* size of data carried in UDP. */ } else traceEvent(TRACE_ERROR, "encode_cc20 outbuf too small."); } else @@ -166,72 +99,51 @@ static int transop_decode_cc20(n2n_trans_op_t * arg, const uint8_t * inbuf, size_t in_len, const uint8_t * peer_mac) { + int len=0; transop_cc20_t * priv = (transop_cc20_t *)arg->priv; uint8_t assembly[N2N_PKT_BUF_SIZE]; - if(((in_len - TRANSOP_CC20_PREAMBLE_SIZE) <= N2N_PKT_BUF_SIZE) /* Cipher text fits in assembly */ - && (in_len >= TRANSOP_CC20_PREAMBLE_SIZE) /* Has at least iv */ + if(((in_len - CC20_PREAMBLE_SIZE) <= N2N_PKT_BUF_SIZE) /* Cipher text fits in assembly */ + && (in_len >= CC20_PREAMBLE_SIZE) /* Has at least iv */ ) { size_t rem=in_len; size_t idx=0; - n2n_cc20_ivec_t dec_ivec = {0}; - - traceEvent(TRACE_DEBUG, "decode_cc20 %lu bytes", in_len); - len = (in_len - TRANSOP_CC20_PREAMBLE_SIZE); - - /* Get the IV */ - decode_buf((uint8_t *)&dec_ivec, N2N_CC20_IVEC_SIZE, inbuf, &rem, &idx); - - EVP_CIPHER_CTX *ctx = priv->dec_ctx; - int evp_len; - int evp_plaintext_len; - - if(1 == EVP_DecryptInit_ex(ctx, priv->cipher, NULL, priv->key, dec_ivec)) { - if(1 == EVP_CIPHER_CTX_set_padding(ctx, 0)) { - if(1 == EVP_DecryptUpdate(ctx, assembly, &evp_len, inbuf + TRANSOP_CC20_PREAMBLE_SIZE, len)) { - evp_plaintext_len = evp_len; - if(1 == EVP_DecryptFinal_ex(ctx, assembly + evp_len, &evp_len)) { - evp_plaintext_len += evp_len; - - if(evp_plaintext_len != len) - traceEvent(TRACE_ERROR, "decode_cc20 openssl decryption: decrypted %u bytes where %u were expected.\n", - evp_plaintext_len, len); - } else - traceEvent(TRACE_ERROR, "decode_cc20 openssl final decryption: %s\n", openssl_err_as_string()); - } else - traceEvent(TRACE_ERROR, "decode_cc20 openssl decrpytion: %s\n", openssl_err_as_string()); - } else - traceEvent(TRACE_ERROR, "decode_cc20 openssl padding setup: %s\n", openssl_err_as_string()); - } else - traceEvent(TRACE_ERROR, "decode_cc20 openssl init: %s\n", openssl_err_as_string()); - - EVP_CIPHER_CTX_reset(ctx); - - memcpy(outbuf, assembly, len); + + traceEvent(TRACE_DEBUG, "decode_cc20 %lu bytes", in_len); + + len = (in_len - CC20_PREAMBLE_SIZE); + + cc20_crypt(outbuf, + inbuf + CC20_PREAMBLE_SIZE, + in_len, + inbuf, // IV + priv->ctx); + } else - traceEvent(TRACE_ERROR, "decode_cc20 inbuf wrong size (%ul) to decrypt.", in_len); + traceEvent(TRACE_ERROR, "decode_cc20 inbuf wrong size (%ul) to decrypt.", in_len); return len; } /* ****************************************************** */ -static int setup_cc20_key(transop_cc20_t *priv, const uint8_t *key, ssize_t key_size) { - uint8_t key_mat_buf[SHA256_DIGEST_LENGTH]; +static int setup_cc20_key(transop_cc20_t *priv, const uint8_t *password, ssize_t password_len) { - priv->cipher = EVP_chacha20(); + uint8_t key_mat[CC20_KEY_BYTES]; - /* Clear out any old possibly longer key matter. */ - memset(&(priv->key), 0, sizeof(priv->key) ); /* The input key always gets hashed to make a more unpredictable and more complete use of the key space */ - SHA256(key, key_size, key_mat_buf); - memcpy (priv->key, key_mat_buf, SHA256_DIGEST_LENGTH); + pearson_hash_256(key_mat, password, password_len); + + if(cc20_init(key_mat, &(priv->ctx))) { + traceEvent(TRACE_ERROR, "setup_cc20_key setup unsuccessful"); + return -1; + } - traceEvent(TRACE_DEBUG, "ChaCha20 key setup completed\n"); + traceEvent(TRACE_DEBUG, "setup_cc20_key completed"); - return(0); + return 0; } /* ****************************************************** */ @@ -242,6 +154,7 @@ static void transop_tick_cc20(n2n_trans_op_t * arg, time_t now) { ; } /* ChaCha20 initialization function */ int n2n_transop_cc20_init(const n2n_edge_conf_t *conf, n2n_trans_op_t *ttt) { + transop_cc20_t *priv; const u_char *encrypt_key = (const u_char *)conf->encrypt_key; size_t encrypt_key_len = strlen(conf->encrypt_key); @@ -261,19 +174,8 @@ int n2n_transop_cc20_init(const n2n_edge_conf_t *conf, n2n_trans_op_t *ttt) { } ttt->priv = priv; - /* Setup openssl's reusable evp_* contexts for encryption and decryption*/ - if(!(priv->enc_ctx = EVP_CIPHER_CTX_new())) { - traceEvent(TRACE_ERROR, "openssl's evp_* encryption context creation: %s\n", openssl_err_as_string()); - return(-1); - } - - if(!(priv->dec_ctx = EVP_CIPHER_CTX_new())) { - traceEvent(TRACE_ERROR, "openssl's evp_* decryption context creation: %s\n", openssl_err_as_string()); - return(-1); - } - /* Setup the cipher and key */ - return(setup_cc20_key(priv, encrypt_key, encrypt_key_len)); + return setup_cc20_key(priv, encrypt_key, encrypt_key_len); } #endif /* HAVE_OPENSSL_1_1 */ From e087124dd9343fa291a6338766841be786fc9d22 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Tue, 1 Sep 2020 16:17:52 +0545 Subject: [PATCH 02/10] reworked cc20 --- CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 753e3c5..0ad8bba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,6 +97,7 @@ add_library(n2n STATIC src/wire.c src/minilzo.c src/tf.c + src/cc20.c src/transform_null.c src/transform_tf.c src/transform_aes.c From 8757312d412f1d43c5b9a133a9ceea045c290fdc Mon Sep 17 00:00:00 2001 From: Logan007 Date: Tue, 1 Sep 2020 20:09:29 +0545 Subject: [PATCH 03/10] reworked cc20 --- include/cc20.h | 29 ++++++---- include/n2n.h | 2 - src/cc20.c | 126 +++++++++++++++++++++++++++++++++++++++---- src/edge.c | 4 -- src/edge_utils.c | 2 - src/transform_cc20.c | 4 +- tools/benchmark.c | 8 --- 7 files changed, 136 insertions(+), 39 deletions(-) diff --git a/include/cc20.h b/include/cc20.h index 1bc52eb..9c0456e 100644 --- a/include/cc20.h +++ b/include/cc20.h @@ -20,26 +20,38 @@ #ifndef CC20_H #define CC20_H +#include #include "n2n.h" // HAVE_OPENSSL_1_1, traceEvent ... +#define CC20_IV_SIZE 16 +#define CC20_KEY_BYTES (256/8) -#ifdef HAVE_OPENSSL_1_1 - +#ifdef HAVE_OPENSSL_1_1 // openSSL 1.1 ---------------------------------------------------- -#include #include #include -#define CC20_IV_SIZE 16 -#define CC20_KEY_BYTES (256/8) - - typedef struct cc20_context_t { EVP_CIPHER_CTX *ctx; /* openssl's reusable evp_* en/de-cryption context */ const EVP_CIPHER *cipher; /* cipher to use: e.g. EVP_chacha20() */ uint8_t key[CC20_KEY_BYTES]; /* the pure key data for payload encryption & decryption */ } cc20_context_t; +#else // plain C -------------------------------------------------------------------------- + +typedef struct cc20_context { + uint32_t keystream32[16]; + size_t position; + + uint8_t key[CC20_KEY_BYTES]; + uint8_t nonce[CC20_IV_SIZE]; + uint64_t counter; + + uint32_t state[16]; +} cc20_context_t; + +#endif // openSSL 1.1, plain C ------------------------------------------------------------ + int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, const unsigned char *iv, cc20_context_t *ctx); @@ -51,7 +63,4 @@ int cc20_init (const unsigned char *key, cc20_context_t **ctx); int cc20_deinit (cc20_context_t *ctx); -#endif // HAVE_OPENSSL_1_1 - - #endif // CC20_H diff --git a/include/n2n.h b/include/n2n.h index 74b8798..18a109e 100644 --- a/include/n2n.h +++ b/include/n2n.h @@ -425,9 +425,7 @@ int n2n_transop_tf_init(const n2n_edge_conf_t *conf, n2n_trans_op_t *ttt); #ifdef N2N_HAVE_AES int n2n_transop_aes_cbc_init(const n2n_edge_conf_t *conf, n2n_trans_op_t *ttt); #endif -#ifdef HAVE_OPENSSL_1_1 int n2n_transop_cc20_init(const n2n_edge_conf_t *conf, n2n_trans_op_t *ttt); -#endif int n2n_transop_speck_init(const n2n_edge_conf_t *conf, n2n_trans_op_t *ttt); /* Log */ diff --git a/src/cc20.c b/src/cc20.c index 87bb5e1..7ba7ef7 100644 --- a/src/cc20.c +++ b/src/cc20.c @@ -20,11 +20,9 @@ #include "cc20.h" -#ifdef HAVE_OPENSSL_1_1 +#if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 --------------------------------------------- -/* ****************************************************** */ - /* get any erorr message out of openssl taken from https://en.wikibooks.org/wiki/OpenSSL/Error_handling */ static char *openssl_err_as_string (void) { @@ -41,7 +39,6 @@ static char *openssl_err_as_string (void) { return ret; } -/* ****************************************************** */ // encryption == decryption int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, @@ -78,13 +75,124 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, } +#else // plain C -------------------------------------------------------------------------- + + +// taken (and modified) from https://github.com/Ginurx/chacha20-c (public domain) + + +static uint32_t rotl32(uint32_t x, int n) { + + return (x << n) | (x >> (32 - n)); +} + +// little endian +static uint32_t pack4(const uint8_t *a) { + + uint32_t res = 0; + res |= (uint32_t)a[0] << 0 * 8; + res |= (uint32_t)a[1] << 1 * 8; + res |= (uint32_t)a[2] << 2 * 8; + res |= (uint32_t)a[3] << 3 * 8; + return res; +} + + +static void unpack4(uint32_t src, uint8_t *dst) { + + dst[0] = (src >> 0 * 8) & 0xff; + dst[1] = (src >> 1 * 8) & 0xff; + dst[2] = (src >> 2 * 8) & 0xff; + dst[3] = (src >> 3 * 8) & 0xff; +} + + +static void chacha20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { + + const uint8_t *magic_constant = (uint8_t*)"expand 32-byte k"; + + memcpy(&(ctx->state[0]), magic_constant, 16); + memcpy (&(ctx->state[4]), ctx->key, CC20_KEY_BYTES); + memcpy(&(ctx->state[12]), nonce, CC20_IV_SIZE); +} + + +#define CHACHA20_QUARTERROUND(x, a, b, c, d) \ + x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 16); \ + x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 12); \ + x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 8); \ + x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 7); + +static void chacha20_block_next(cc20_context_t *ctx) { + + int i; + + for(i = 0; i < 16; i++) + ctx->keystream32[i] = ctx->state[i]; + + for(i = 0; i < 10; i++) { + CHACHA20_QUARTERROUND(ctx->keystream32, 0, 4, 8, 12) + CHACHA20_QUARTERROUND(ctx->keystream32, 1, 5, 9, 13) + CHACHA20_QUARTERROUND(ctx->keystream32, 2, 6, 10, 14) + CHACHA20_QUARTERROUND(ctx->keystream32, 3, 7, 11, 15) + CHACHA20_QUARTERROUND(ctx->keystream32, 0, 5, 10, 15) + CHACHA20_QUARTERROUND(ctx->keystream32, 1, 6, 11, 12) + CHACHA20_QUARTERROUND(ctx->keystream32, 2, 7, 8, 13) + CHACHA20_QUARTERROUND(ctx->keystream32, 3, 4, 9, 14) + } + + for(i = 0; i < 16; i++) + ctx->keystream32[i] += ctx->state[i]; + + uint32_t *counter = ctx->state + 12; + // increment counter + counter[0]++; + if(0 == counter[0]) { + // wrap around occured, increment higher 32 bits of counter + counter[1]++; + // Limited to 2^64 blocks of 64 bytes each. + // if you want to process more than 1180591620717411303424 bytes + // you have other problems. + // we could keep counting with counter[2] and counter[3] (nonce), + // but then we risk reusing the nonce which is very bad. + } +} + + +static void chacha20_init_context(cc20_context_t *ctx, const uint8_t *nonce) { + + chacha20_init_block(ctx, nonce); + ctx->position = 64; +} + + +int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, + const unsigned char *iv, cc20_context_t *ctx) { + + chacha20_init_context(ctx, iv); + + uint8_t *keystream8 = (uint8_t*)ctx->keystream32; + for(size_t i = 0; i < in_len; i++) { + if(ctx->position >= 64) { + chacha20_block_next(ctx); + ctx->position = 0; + } + out[i] = in[i] ^ keystream8[ctx->position]; + ctx->position++; + } +} + + +#endif // openSSL 1.1, plain C ------------------------------------------------------------ + + int cc20_init (const unsigned char *key, cc20_context_t **ctx) { // allocate context... *ctx = (cc20_context_t*) calloc(1, sizeof(cc20_context_t)); if (!(*ctx)) return -1; - +#if defined (HAVE_OPENSSL_1_1) if(!((*ctx)->ctx = EVP_CIPHER_CTX_new())) { traceEvent(TRACE_ERROR, "cc20_init openssl's evp_* encryption context creation failed: %s", openssl_err_as_string()); @@ -92,7 +200,7 @@ int cc20_init (const unsigned char *key, cc20_context_t **ctx) { } (*ctx)->cipher = EVP_chacha20(); - +#endif memcpy((*ctx)->key, key, CC20_KEY_BYTES); return 0; @@ -101,10 +209,8 @@ int cc20_init (const unsigned char *key, cc20_context_t **ctx) { int cc20_deinit (cc20_context_t *ctx) { +#if defined (HAVE_OPENSSL_1_1) if (ctx->ctx) EVP_CIPHER_CTX_free(ctx->ctx); - +#endif return 0; } - - -#endif // HAVE_OPENSSL_1_1 diff --git a/src/edge.c b/src/edge.c index 1f2423e..61ed709 100644 --- a/src/edge.c +++ b/src/edge.c @@ -168,9 +168,7 @@ static void help() { #ifdef N2N_HAVE_AES "-A3 or -A (deprecated) = AES-CBC, " #endif -#ifdef HAVE_OPENSSL_1_1 "-A4 = ChaCha20, " -#endif "-A5 = Speck-CTR.\n"); printf("-H | Enable full header encryption. Requires supernode with fixed community.\n"); printf("-z1 ... -z2 or -z | Enable compression for outgoing data packets: -z1 or -z = lzo1x" @@ -250,13 +248,11 @@ static void setPayloadEncryption( n2n_edge_conf_t *conf, int cipher) { break; } #endif -#ifdef HAVE_OPENSSL_1_1 case 4: { conf->transop_id = N2N_TRANSFORM_ID_CHACHA20; break; } -#endif case 5: { conf->transop_id = N2N_TRANSFORM_ID_SPECK; diff --git a/src/edge_utils.c b/src/edge_utils.c index f3d807e..07baca3 100644 --- a/src/edge_utils.c +++ b/src/edge_utils.c @@ -226,11 +226,9 @@ n2n_edge_t* edge_init(const n2n_edge_conf_t *conf, int *rv) { rc = n2n_transop_aes_cbc_init(&eee->conf, &eee->transop); break; #endif -#ifdef HAVE_OPENSSL_1_1 case N2N_TRANSFORM_ID_CHACHA20: rc = n2n_transop_cc20_init(&eee->conf, &eee->transop); break; -#endif case N2N_TRANSFORM_ID_SPECK: rc = n2n_transop_speck_init(&eee->conf, &eee->transop); break; diff --git a/src/transform_cc20.c b/src/transform_cc20.c index d3c42f3..26ffd3b 100644 --- a/src/transform_cc20.c +++ b/src/transform_cc20.c @@ -16,9 +16,9 @@ * */ + #include "n2n.h" -#ifdef HAVE_OPENSSL_1_1 /* ChaCha20 plaintext preamble */ #define CC20_PREAMBLE_SIZE (CC20_IV_SIZE) @@ -177,5 +177,3 @@ int n2n_transop_cc20_init(const n2n_edge_conf_t *conf, n2n_trans_op_t *ttt) { /* Setup the cipher and key */ return setup_cc20_key(priv, encrypt_key, encrypt_key_len); } - -#endif /* HAVE_OPENSSL_1_1 */ diff --git a/tools/benchmark.c b/tools/benchmark.c index 61f7ee7..48112c2 100644 --- a/tools/benchmark.c +++ b/tools/benchmark.c @@ -66,9 +66,7 @@ int main(int argc, char * argv[]) { #ifdef N2N_HAVE_AES n2n_trans_op_t transop_aes_cbc; #endif -#ifdef HAVE_OPENSSL_1_1 n2n_trans_op_t transop_cc20; -#endif n2n_trans_op_t transop_speck; n2n_edge_conf_t conf; @@ -86,9 +84,7 @@ int main(int argc, char * argv[]) { #ifdef N2N_HAVE_AES n2n_transop_aes_cbc_init(&conf, &transop_aes_cbc); #endif -#ifdef HAVE_OPENSSL_1_1 n2n_transop_cc20_init(&conf, &transop_cc20); -#endif n2n_transop_speck_init(&conf, &transop_speck); /* Run the tests */ @@ -97,9 +93,7 @@ int main(int argc, char * argv[]) { #ifdef N2N_HAVE_AES run_transop_benchmark("transop_aes", &transop_aes_cbc, &conf, pktbuf); #endif -#ifdef HAVE_OPENSSL_1_1 run_transop_benchmark("transop_cc20", &transop_cc20, &conf, pktbuf); -#endif run_transop_benchmark("transop_speck", &transop_speck, &conf, pktbuf); /* Cleanup */ @@ -108,9 +102,7 @@ int main(int argc, char * argv[]) { #ifdef N2N_HAVE_AES transop_aes_cbc.deinit(&transop_aes_cbc); #endif -#ifdef HAVE_OPENSSL_1_1 transop_cc20.deinit(&transop_cc20); -#endif transop_speck.deinit(&transop_speck); return 0; From 91ca25baa24749c1f93187147debacfcb9f34cd2 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Tue, 1 Sep 2020 22:25:20 +0545 Subject: [PATCH 04/10] reworked cc20 --- src/cc20.c | 59 +++++++++++++++++++----------------------------------- 1 file changed, 21 insertions(+), 38 deletions(-) diff --git a/src/cc20.c b/src/cc20.c index 7ba7ef7..2372db4 100644 --- a/src/cc20.c +++ b/src/cc20.c @@ -81,31 +81,6 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, // taken (and modified) from https://github.com/Ginurx/chacha20-c (public domain) -static uint32_t rotl32(uint32_t x, int n) { - - return (x << n) | (x >> (32 - n)); -} - -// little endian -static uint32_t pack4(const uint8_t *a) { - - uint32_t res = 0; - res |= (uint32_t)a[0] << 0 * 8; - res |= (uint32_t)a[1] << 1 * 8; - res |= (uint32_t)a[2] << 2 * 8; - res |= (uint32_t)a[3] << 3 * 8; - return res; -} - - -static void unpack4(uint32_t src, uint8_t *dst) { - - dst[0] = (src >> 0 * 8) & 0xff; - dst[1] = (src >> 1 * 8) & 0xff; - dst[2] = (src >> 2 * 8) & 0xff; - dst[3] = (src >> 3 * 8) & 0xff; -} - static void chacha20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { @@ -116,12 +91,12 @@ static void chacha20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { memcpy(&(ctx->state[12]), nonce, CC20_IV_SIZE); } - +#define ROL32(x,r) (((x)<<(r))|((x)>>(32-(r)))) #define CHACHA20_QUARTERROUND(x, a, b, c, d) \ - x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 16); \ - x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 12); \ - x[a] += x[b]; x[d] = rotl32(x[d] ^ x[a], 8); \ - x[c] += x[d]; x[b] = rotl32(x[b] ^ x[c], 7); + x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 16); \ + x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 12); \ + x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ + x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7); static void chacha20_block_next(cc20_context_t *ctx) { @@ -145,16 +120,24 @@ static void chacha20_block_next(cc20_context_t *ctx) { ctx->keystream32[i] += ctx->state[i]; uint32_t *counter = ctx->state + 12; - // increment counter - counter[0]++; + // increment counter, make sure it is little endian in memory + uint32_t c = le32toh(counter[0]); + counter[0] = htole32(++c); if(0 == counter[0]) { // wrap around occured, increment higher 32 bits of counter - counter[1]++; - // Limited to 2^64 blocks of 64 bytes each. - // if you want to process more than 1180591620717411303424 bytes - // you have other problems. - // we could keep counting with counter[2] and counter[3] (nonce), - // but then we risk reusing the nonce which is very bad. + // unlikely with 1,500 byte sized packets + c = le32toh(counter[1]); + counter[1] = htole32(++c); + if(0 == counter[1]) { + // very unlikely + c = le32toh(counter[2]); + counter[2] = htole32(++c); + if(0 == counter[2]) { + // extremely unlikely + c = le32toh(counter[3]); + counter[3] = htole32(++c); + } + } } } From 87ee4d6fa7a29c4daa866f014787d0f8ed6935be Mon Sep 17 00:00:00 2001 From: Logan007 Date: Tue, 1 Sep 2020 22:50:52 +0545 Subject: [PATCH 05/10] reworked cc20 --- src/cc20.c | 36 +++++++++++++++++++++++------------- src/edge.c | 1 - tools/benchmark.c | 3 +-- 3 files changed, 24 insertions(+), 16 deletions(-) diff --git a/src/cc20.c b/src/cc20.c index 2372db4..8d13ffc 100644 --- a/src/cc20.c +++ b/src/cc20.c @@ -92,11 +92,20 @@ static void chacha20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { } #define ROL32(x,r) (((x)<<(r))|((x)>>(32-(r)))) -#define CHACHA20_QUARTERROUND(x, a, b, c, d) \ +#define CHACHA20_QUARTERROUND(x, a, b, c, d) \ x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 16); \ x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 12); \ - x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ - x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7); + x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ + x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7) +#define CHACHA20_DOUBLE_ROUND \ + CHACHA20_QUARTERROUND(ctx->keystream32, 0, 4, 8, 12); \ + CHACHA20_QUARTERROUND(ctx->keystream32, 1, 5, 9, 13); \ + CHACHA20_QUARTERROUND(ctx->keystream32, 2, 6, 10, 14); \ + CHACHA20_QUARTERROUND(ctx->keystream32, 3, 7, 11, 15); \ + CHACHA20_QUARTERROUND(ctx->keystream32, 0, 5, 10, 15); \ + CHACHA20_QUARTERROUND(ctx->keystream32, 1, 6, 11, 12); \ + CHACHA20_QUARTERROUND(ctx->keystream32, 2, 7, 8, 13); \ + CHACHA20_QUARTERROUND(ctx->keystream32, 3, 4, 9, 14) static void chacha20_block_next(cc20_context_t *ctx) { @@ -105,16 +114,17 @@ static void chacha20_block_next(cc20_context_t *ctx) { for(i = 0; i < 16; i++) ctx->keystream32[i] = ctx->state[i]; - for(i = 0; i < 10; i++) { - CHACHA20_QUARTERROUND(ctx->keystream32, 0, 4, 8, 12) - CHACHA20_QUARTERROUND(ctx->keystream32, 1, 5, 9, 13) - CHACHA20_QUARTERROUND(ctx->keystream32, 2, 6, 10, 14) - CHACHA20_QUARTERROUND(ctx->keystream32, 3, 7, 11, 15) - CHACHA20_QUARTERROUND(ctx->keystream32, 0, 5, 10, 15) - CHACHA20_QUARTERROUND(ctx->keystream32, 1, 6, 11, 12) - CHACHA20_QUARTERROUND(ctx->keystream32, 2, 7, 8, 13) - CHACHA20_QUARTERROUND(ctx->keystream32, 3, 4, 9, 14) - } + // 10 double rounds + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; + CHACHA20_DOUBLE_ROUND; for(i = 0; i < 16; i++) ctx->keystream32[i] += ctx->state[i]; diff --git a/src/edge.c b/src/edge.c index 7e5ace5..22c8ff8 100644 --- a/src/edge.c +++ b/src/edge.c @@ -164,7 +164,6 @@ static void help() { printf("-r | Enable packet forwarding through n2n community.\n"); printf("-A1 | Disable payload encryption. Do not use with key (defaulting to Twofish then).\n"); printf("-A2 ... -A5 or -A | Choose a cipher for payload encryption, requires a key: -A2 = Twofish (default),\n"); - printf(" | " printf(" | -A3 or -A (deprecated) = AES, " "-A4 = ChaCha20, " "-A5 = Speck-CTR.\n"); diff --git a/tools/benchmark.c b/tools/benchmark.c index 8b631a4..62c7ca8 100644 --- a/tools/benchmark.c +++ b/tools/benchmark.c @@ -63,7 +63,6 @@ static void parseArgs(int argc, char * argv[]) { int main(int argc, char * argv[]) { uint8_t pktbuf[N2N_PKT_BUF_SIZE]; n2n_trans_op_t transop_null, transop_tf; - n2n_trans_op_t transop_aes_cbc; n2n_trans_op_t transop_aes; n2n_trans_op_t transop_cc20; @@ -80,7 +79,7 @@ int main(int argc, char * argv[]) { /* Init transopts */ n2n_transop_null_init(&conf, &transop_null); n2n_transop_tf_init(&conf, &transop_tf); - n2n_transop_aes_cbc_init(&conf, &transop_aes_cbc); + n2n_transop_aes_init(&conf, &transop_aes); n2n_transop_aes_init(&conf, &transop_aes); n2n_transop_cc20_init(&conf, &transop_cc20); n2n_transop_speck_init(&conf, &transop_speck); From 0267f1db7fc2b07e602240e4d913b3c480b0fe16 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Tue, 1 Sep 2020 22:55:53 +0545 Subject: [PATCH 06/10] reworked cc20 --- tools/benchmark.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/benchmark.c b/tools/benchmark.c index 62c7ca8..4a43c41 100644 --- a/tools/benchmark.c +++ b/tools/benchmark.c @@ -80,7 +80,6 @@ int main(int argc, char * argv[]) { n2n_transop_null_init(&conf, &transop_null); n2n_transop_tf_init(&conf, &transop_tf); n2n_transop_aes_init(&conf, &transop_aes); - n2n_transop_aes_init(&conf, &transop_aes); n2n_transop_cc20_init(&conf, &transop_cc20); n2n_transop_speck_init(&conf, &transop_speck); From f1f8ca609269173e3a895620fbd01da138ddaa87 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Wed, 2 Sep 2020 02:16:30 +0545 Subject: [PATCH 07/10] increased built-in cc20's speed --- include/cc20.h | 7 +-- src/cc20.c | 146 ++++++++++++++++++++++++++++++++++--------------- 2 files changed, 104 insertions(+), 49 deletions(-) diff --git a/include/cc20.h b/include/cc20.h index 9c0456e..9216d23 100644 --- a/include/cc20.h +++ b/include/cc20.h @@ -41,13 +41,8 @@ typedef struct cc20_context_t { typedef struct cc20_context { uint32_t keystream32[16]; - size_t position; - - uint8_t key[CC20_KEY_BYTES]; - uint8_t nonce[CC20_IV_SIZE]; - uint64_t counter; - uint32_t state[16]; + uint8_t key[CC20_KEY_BYTES]; } cc20_context_t; #endif // openSSL 1.1, plain C ------------------------------------------------------------ diff --git a/src/cc20.c b/src/cc20.c index 8d13ffc..ae7643e 100644 --- a/src/cc20.c +++ b/src/cc20.c @@ -81,57 +81,88 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, // taken (and modified) from https://github.com/Ginurx/chacha20-c (public domain) - static void chacha20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { const uint8_t *magic_constant = (uint8_t*)"expand 32-byte k"; - memcpy(&(ctx->state[0]), magic_constant, 16); - memcpy (&(ctx->state[4]), ctx->key, CC20_KEY_BYTES); + memcpy(&(ctx->state[ 0]), magic_constant, 16); + memcpy(&(ctx->state[ 4]), ctx->key, CC20_KEY_BYTES); memcpy(&(ctx->state[12]), nonce, CC20_IV_SIZE); } + #define ROL32(x,r) (((x)<<(r))|((x)>>(32-(r)))) #define CHACHA20_QUARTERROUND(x, a, b, c, d) \ x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 16); \ x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 12); \ - x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ - x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7) -#define CHACHA20_DOUBLE_ROUND \ - CHACHA20_QUARTERROUND(ctx->keystream32, 0, 4, 8, 12); \ - CHACHA20_QUARTERROUND(ctx->keystream32, 1, 5, 9, 13); \ - CHACHA20_QUARTERROUND(ctx->keystream32, 2, 6, 10, 14); \ - CHACHA20_QUARTERROUND(ctx->keystream32, 3, 7, 11, 15); \ - CHACHA20_QUARTERROUND(ctx->keystream32, 0, 5, 10, 15); \ - CHACHA20_QUARTERROUND(ctx->keystream32, 1, 6, 11, 12); \ - CHACHA20_QUARTERROUND(ctx->keystream32, 2, 7, 8, 13); \ - CHACHA20_QUARTERROUND(ctx->keystream32, 3, 4, 9, 14) + x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ + x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7) +#define CHACHA20_DOUBLE_ROUND(s) \ + /* odd round */ \ + CHACHA20_QUARTERROUND(s, 0, 4, 8, 12); \ + CHACHA20_QUARTERROUND(s, 1, 5, 9, 13); \ + CHACHA20_QUARTERROUND(s, 2, 6, 10, 14); \ + CHACHA20_QUARTERROUND(s, 3, 7, 11, 15); \ + /* even round */ \ + CHACHA20_QUARTERROUND(s, 0, 5, 10, 15); \ + CHACHA20_QUARTERROUND(s, 1, 6, 11, 12); \ + CHACHA20_QUARTERROUND(s, 2, 7, 8, 13); \ + CHACHA20_QUARTERROUND(s, 3, 4, 9, 14) static void chacha20_block_next(cc20_context_t *ctx) { - int i; - - for(i = 0; i < 16; i++) - ctx->keystream32[i] = ctx->state[i]; + size_t i; + uint32_t *counter = ctx->state + 12; + uint32_t c; + + ctx->keystream32[ 0] = ctx->state[ 0]; + ctx->keystream32[ 1] = ctx->state[ 1]; + ctx->keystream32[ 2] = ctx->state[ 2]; + ctx->keystream32[ 3] = ctx->state[ 3]; + ctx->keystream32[ 4] = ctx->state[ 4]; + ctx->keystream32[ 5] = ctx->state[ 5]; + ctx->keystream32[ 6] = ctx->state[ 6]; + ctx->keystream32[ 7] = ctx->state[ 7]; + ctx->keystream32[ 8] = ctx->state[ 8]; + ctx->keystream32[ 9] = ctx->state[ 9]; + ctx->keystream32[10] = ctx->state[10]; + ctx->keystream32[11] = ctx->state[11]; + ctx->keystream32[12] = ctx->state[12]; + ctx->keystream32[13] = ctx->state[13]; + ctx->keystream32[14] = ctx->state[14]; + ctx->keystream32[15] = ctx->state[15]; // 10 double rounds - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - CHACHA20_DOUBLE_ROUND; - - for(i = 0; i < 16; i++) - ctx->keystream32[i] += ctx->state[i]; - - uint32_t *counter = ctx->state + 12; - // increment counter, make sure it is little endian in memory - uint32_t c = le32toh(counter[0]); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CHACHA20_DOUBLE_ROUND(ctx->keystream32); + + ctx->keystream32[ 0] += ctx->state[ 0]; + ctx->keystream32[ 1] += ctx->state[ 1]; + ctx->keystream32[ 2] += ctx->state[ 2]; + ctx->keystream32[ 3] += ctx->state[ 3]; + ctx->keystream32[ 4] += ctx->state[ 4]; + ctx->keystream32[ 5] += ctx->state[ 5]; + ctx->keystream32[ 6] += ctx->state[ 6]; + ctx->keystream32[ 7] += ctx->state[ 7]; + ctx->keystream32[ 8] += ctx->state[ 8]; + ctx->keystream32[ 9] += ctx->state[ 9]; + ctx->keystream32[10] += ctx->state[10]; + ctx->keystream32[11] += ctx->state[11]; + ctx->keystream32[12] += ctx->state[12]; + ctx->keystream32[13] += ctx->state[13]; + ctx->keystream32[14] += ctx->state[14]; + ctx->keystream32[15] += ctx->state[15]; + + // increment counter, make sure it is and stays little endian in memory + c = le32toh(counter[0]); counter[0] = htole32(++c); if(0 == counter[0]) { // wrap around occured, increment higher 32 bits of counter @@ -155,23 +186,52 @@ static void chacha20_block_next(cc20_context_t *ctx) { static void chacha20_init_context(cc20_context_t *ctx, const uint8_t *nonce) { chacha20_init_block(ctx, nonce); - ctx->position = 64; } int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, const unsigned char *iv, cc20_context_t *ctx) { + uint8_t *keystream8 = (uint8_t*)ctx->keystream32; + uint32_t * in_p = (uint32_t*)in; + uint32_t * out_p = (uint32_t*)out; + size_t tmp_len = in_len; + chacha20_init_context(ctx, iv); - uint8_t *keystream8 = (uint8_t*)ctx->keystream32; - for(size_t i = 0; i < in_len; i++) { - if(ctx->position >= 64) { - chacha20_block_next(ctx); - ctx->position = 0; + while(in_len >= 64) { + + chacha20_block_next(ctx); + + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 0]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 1]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 2]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 3]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 4]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 5]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 6]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 7]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 8]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 9]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[10]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[11]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[12]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[13]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[14]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[15]; in_p++; out_p++; + in_len -= 64; + } + + tmp_len = tmp_len - in_len; + if(in_len > 0) { + + chacha20_block_next(ctx); + + while(in_len > 0) { + out[tmp_len] = in[tmp_len] ^ keystream8[tmp_len%64]; + tmp_len++; + in_len--; } - out[i] = in[i] ^ keystream8[ctx->position]; - ctx->position++; } } From 5bdccb17fd902b11d1f8609e4cc306b1e978afe7 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Wed, 2 Sep 2020 02:24:24 +0545 Subject: [PATCH 08/10] increased built-in cc20's speed --- src/cc20.c | 1 - 1 file changed, 1 deletion(-) diff --git a/src/cc20.c b/src/cc20.c index ae7643e..e38640f 100644 --- a/src/cc20.c +++ b/src/cc20.c @@ -111,7 +111,6 @@ static void chacha20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { static void chacha20_block_next(cc20_context_t *ctx) { - size_t i; uint32_t *counter = ctx->state + 12; uint32_t c; From cc7430ae66886cd87d56d4adca0e15483b216089 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Wed, 2 Sep 2020 02:33:05 +0545 Subject: [PATCH 09/10] increased built-in cc20's speed --- src/cc20.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cc20.c b/src/cc20.c index e38640f..61dd7b4 100644 --- a/src/cc20.c +++ b/src/cc20.c @@ -221,11 +221,11 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, in_len -= 64; } - tmp_len = tmp_len - in_len; if(in_len > 0) { chacha20_block_next(ctx); + tmp_len -= in_len; while(in_len > 0) { out[tmp_len] = in[tmp_len] ^ keystream8[tmp_len%64]; tmp_len++; From 885877cda39b4a3c7f789af30ec86240b1e386a9 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Wed, 2 Sep 2020 16:08:30 +0545 Subject: [PATCH 10/10] added sse to built-in cc20 --- include/cc20.h | 10 ++ src/cc20.c | 242 ++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 218 insertions(+), 34 deletions(-) diff --git a/include/cc20.h b/include/cc20.h index 9216d23..44ef07a 100644 --- a/include/cc20.h +++ b/include/cc20.h @@ -37,6 +37,16 @@ typedef struct cc20_context_t { uint8_t key[CC20_KEY_BYTES]; /* the pure key data for payload encryption & decryption */ } cc20_context_t; +#elif defined (__SSE2__) // SSE ---------------------------------------------------------- + +#include + +typedef struct cc20_context { + uint32_t keystream32[16]; + uint32_t state[16]; + uint8_t key[CC20_KEY_BYTES]; +} cc20_context_t; + #else // plain C -------------------------------------------------------------------------- typedef struct cc20_context { diff --git a/src/cc20.c b/src/cc20.c index 61dd7b4..8ed4b31 100644 --- a/src/cc20.c +++ b/src/cc20.c @@ -75,13 +75,187 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, } +#elif defined (__SSE2__) // SSE ---------------------------------------------------------- + + +// taken (and modified and enhanced) from +// https://github.com/Ginurx/chacha20-c (public domain) + + +static void cc20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { + + const uint8_t *magic_constant = (uint8_t*)"expand 32-byte k"; + + memcpy(&(ctx->state[ 0]), magic_constant, 16); + memcpy(&(ctx->state[ 4]), ctx->key, CC20_KEY_BYTES); + memcpy(&(ctx->state[12]), nonce, CC20_IV_SIZE); +} + + +#define SL _mm_slli_epi32 +#define SR _mm_srli_epi32 +#define XOR _mm_xor_si128 +#define AND _mm_and_si128 +#define ADD _mm_add_epi32 +#define ROL(X,r) (XOR(SL(X,r),SR(X,(32-r)))) + +#if defined (__SSE3__) // --- SSE3 +#define L8 _mm_set_epi32(0x0e0d0c0fL, 0x0a09080bL, 0x06050407L, 0x02010003L) +#define L16 _mm_set_epi32(0x0d0c0f0eL, 0x09080b0aL, 0x05040706L, 0x01000302L) +#define ROL8(X) ( _mm_shuffle_epi8(X, L8)) /* SSE 3 */ +#define ROL16(X) ( _mm_shuffle_epi8(X, L16)) /* SSE 3 */ +#else // --- regular SSE2 -------- +#define ROL8(X) ROL(X,8) +#define ROL16(X) ROL(X,16) +#endif // ------------------------ + +#define CC20_PERMUTE_ROWS(A,B,C,D) \ + B = _mm_shuffle_epi32(B, _MM_SHUFFLE(0, 3, 2, 1)); \ + C = _mm_shuffle_epi32(C, _MM_SHUFFLE(1, 0, 3, 2)); \ + D = _mm_shuffle_epi32(D, _MM_SHUFFLE(2, 1, 0, 3)) + +#define CC20_PERMUTE_ROWS_INV(A,B,C,D) \ + B = _mm_shuffle_epi32(B, _MM_SHUFFLE(2, 1, 0, 3)); \ + C = _mm_shuffle_epi32(C, _MM_SHUFFLE(1, 0, 3, 2)); \ + D = _mm_shuffle_epi32(D, _MM_SHUFFLE(0, 3, 2, 1)) + +#define CC20_ODD_ROUND(A,B,C,D) \ + /* odd round */ \ + A = ADD(A, B); D = ROL16(XOR(D, A)); \ + C = ADD(C, D); B = ROL(XOR(B, C), 12); \ + A = ADD(A, B); D = ROL8(XOR(D, A)); \ + C = ADD(C, D); B = ROL(XOR(B, C), 7) + +#define CC20_EVEN_ROUND(A,B,C,D) \ + CC20_PERMUTE_ROWS (A, B, C, D); \ + CC20_ODD_ROUND (A, B, C, D); \ + CC20_PERMUTE_ROWS_INV(A, B, C, D) + +#define CC20_DOUBLE_ROUND(A,B,C,D) \ + CC20_ODD_ROUND (A, B, C, D); \ + CC20_EVEN_ROUND(A, B, C, D) + +static void cc20_block_next(cc20_context_t *ctx) { + + uint32_t *counter = ctx->state + 12; + uint32_t cnt; + + __m128i a, b, c, d, k0, k1, k2, k3; + + a = _mm_loadu_si128 ((__m128i*)&(ctx->state[ 0])); + b = _mm_loadu_si128 ((__m128i*)&(ctx->state[ 4])); + c = _mm_loadu_si128 ((__m128i*)&(ctx->state[ 8])); + d = _mm_loadu_si128 ((__m128i*)&(ctx->state[12])); + + k0 = a; + k1 = b; + k2 = c; + k3 = d; + + // 10 double rounds + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + CC20_DOUBLE_ROUND(k0, k1, k2, k3); + + k0 = ADD(k0, a); + k1 = ADD(k1, b); + k2 = ADD(k2, c); + k3 = ADD(k3, d); + + _mm_storeu_si128 ((__m128i*)&(ctx->keystream32[ 0]), k0); + _mm_storeu_si128 ((__m128i*)&(ctx->keystream32[ 4]), k1); + _mm_storeu_si128 ((__m128i*)&(ctx->keystream32[ 8]), k2); + _mm_storeu_si128 ((__m128i*)&(ctx->keystream32[12]), k3); + + // increment counter, make sure it is and stays little endian in memory + cnt = le32toh(counter[0]); + counter[0] = htole32(++cnt); + if(0 == counter[0]) { + // wrap around occured, increment higher 32 bits of counter + // unlikely with 1,500 byte sized packets + cnt = le32toh(counter[1]); + counter[1] = htole32(++cnt); + if(0 == counter[1]) { + // very unlikely + cnt = le32toh(counter[2]); + counter[2] = htole32(++cnt); + if(0 == counter[2]) { + // extremely unlikely + cnt = le32toh(counter[3]); + counter[3] = htole32(++cnt); + } + } + } +} + + +static void cc20_init_context(cc20_context_t *ctx, const uint8_t *nonce) { + + cc20_init_block(ctx, nonce); +} + + +int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, + const unsigned char *iv, cc20_context_t *ctx) { + + uint8_t *keystream8 = (uint8_t*)ctx->keystream32; + uint32_t * in_p = (uint32_t*)in; + uint32_t * out_p = (uint32_t*)out; + size_t tmp_len = in_len; + + cc20_init_context(ctx, iv); + + while(in_len >= 64) { + + cc20_block_next(ctx); + + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 0]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 1]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 2]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 3]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 4]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 5]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 6]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 7]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 8]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 9]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[10]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[11]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[12]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[13]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[14]; in_p++; out_p++; + *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[15]; in_p++; out_p++; + in_len -= 64; + } + + if(in_len > 0) { + + cc20_block_next(ctx); + + tmp_len -= in_len; + while(in_len > 0) { + out[tmp_len] = in[tmp_len] ^ keystream8[tmp_len%64]; + tmp_len++; + in_len--; + } + } +} + + #else // plain C -------------------------------------------------------------------------- // taken (and modified) from https://github.com/Ginurx/chacha20-c (public domain) -static void chacha20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { +static void cc20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { const uint8_t *magic_constant = (uint8_t*)"expand 32-byte k"; @@ -92,24 +266,24 @@ static void chacha20_init_block(cc20_context_t *ctx, const uint8_t nonce[]) { #define ROL32(x,r) (((x)<<(r))|((x)>>(32-(r)))) -#define CHACHA20_QUARTERROUND(x, a, b, c, d) \ - x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 16); \ - x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 12); \ - x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ - x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7) -#define CHACHA20_DOUBLE_ROUND(s) \ - /* odd round */ \ - CHACHA20_QUARTERROUND(s, 0, 4, 8, 12); \ - CHACHA20_QUARTERROUND(s, 1, 5, 9, 13); \ - CHACHA20_QUARTERROUND(s, 2, 6, 10, 14); \ - CHACHA20_QUARTERROUND(s, 3, 7, 11, 15); \ - /* even round */ \ - CHACHA20_QUARTERROUND(s, 0, 5, 10, 15); \ - CHACHA20_QUARTERROUND(s, 1, 6, 11, 12); \ - CHACHA20_QUARTERROUND(s, 2, 7, 8, 13); \ - CHACHA20_QUARTERROUND(s, 3, 4, 9, 14) - -static void chacha20_block_next(cc20_context_t *ctx) { +#define CC20_QUARTERROUND(x, a, b, c, d) \ + x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 16); \ + x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 12); \ + x[a] += x[b]; x[d] = ROL32(x[d] ^ x[a], 8); \ + x[c] += x[d]; x[b] = ROL32(x[b] ^ x[c], 7) +#define CC20_DOUBLE_ROUND(s) \ + /* odd round */ \ + CC20_QUARTERROUND(s, 0, 4, 8, 12); \ + CC20_QUARTERROUND(s, 1, 5, 9, 13); \ + CC20_QUARTERROUND(s, 2, 6, 10, 14); \ + CC20_QUARTERROUND(s, 3, 7, 11, 15); \ + /* even round */ \ + CC20_QUARTERROUND(s, 0, 5, 10, 15); \ + CC20_QUARTERROUND(s, 1, 6, 11, 12); \ + CC20_QUARTERROUND(s, 2, 7, 8, 13); \ + CC20_QUARTERROUND(s, 3, 4, 9, 14) + +static void cc20_block_next(cc20_context_t *ctx) { uint32_t *counter = ctx->state + 12; uint32_t c; @@ -132,16 +306,16 @@ static void chacha20_block_next(cc20_context_t *ctx) { ctx->keystream32[15] = ctx->state[15]; // 10 double rounds - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); - CHACHA20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); + CC20_DOUBLE_ROUND(ctx->keystream32); ctx->keystream32[ 0] += ctx->state[ 0]; ctx->keystream32[ 1] += ctx->state[ 1]; @@ -182,9 +356,9 @@ static void chacha20_block_next(cc20_context_t *ctx) { } -static void chacha20_init_context(cc20_context_t *ctx, const uint8_t *nonce) { +static void cc20_init_context(cc20_context_t *ctx, const uint8_t *nonce) { - chacha20_init_block(ctx, nonce); + cc20_init_block(ctx, nonce); } @@ -196,11 +370,11 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, uint32_t * out_p = (uint32_t*)out; size_t tmp_len = in_len; - chacha20_init_context(ctx, iv); + cc20_init_context(ctx, iv); while(in_len >= 64) { - chacha20_block_next(ctx); + cc20_block_next(ctx); *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 0]; in_p++; out_p++; *(uint32_t*)out_p = *(uint32_t*)in_p ^ ctx->keystream32[ 1]; in_p++; out_p++; @@ -223,7 +397,7 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, if(in_len > 0) { - chacha20_block_next(ctx); + cc20_block_next(ctx); tmp_len -= in_len; while(in_len > 0) {