Browse Source

readability code clean-up (#529)

pull/536/head
Logan oos Even 4 years ago
committed by GitHub
parent
commit
3252231ecb
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
  1. 8
      include/aes.h
  2. 323
      src/aes.c

8
include/aes.h

@ -37,7 +37,7 @@
#define AES128_KEY_BYTES (128/8) #define AES128_KEY_BYTES (128/8)
#if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 --------------------------------------------- #if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 ---------------------------------------------------------------------
#include <openssl/aes.h> #include <openssl/aes.h>
#include <openssl/evp.h> #include <openssl/evp.h>
@ -51,7 +51,7 @@ typedef struct aes_context_t {
AES_KEY ecb_dec_key; /* one step ecb decryption key */ AES_KEY ecb_dec_key; /* one step ecb decryption key */
} aes_context_t; } aes_context_t;
#elif defined (__AES__) && defined (__SSE2__) // Intel's AES-NI --------------------------- #elif defined (__AES__) && defined (__SSE2__) // Intel's AES-NI ---------------------------------------------------
#include <immintrin.h> #include <immintrin.h>
@ -61,7 +61,7 @@ typedef struct aes_context_t {
int Nr; int Nr;
} aes_context_t; } aes_context_t;
#else // plain C -------------------------------------------------------------------------- #else // plain C --------------------------------------------------------------------------------------------------
typedef struct aes_context_t { typedef struct aes_context_t {
uint32_t enc_rk[60]; // round keys for encryption uint32_t enc_rk[60]; // round keys for encryption
@ -69,7 +69,7 @@ typedef struct aes_context_t {
int Nr; // number of rounds int Nr; // number of rounds
} aes_context_t; } aes_context_t;
#endif // --------------------------------------------------------------------------------- #endif // ---------------------------------------------------------------------------------------------------------
int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,

323
src/aes.c

@ -20,7 +20,7 @@
#include "n2n.h" #include "n2n.h"
#if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 --------------------------------------------- #if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 ---------------------------------------------------------------------
// get any erorr message out of openssl // get any erorr message out of openssl
@ -37,6 +37,7 @@ static char *openssl_err_as_string (void) {
memcpy (ret, buf, len); memcpy (ret, buf, len);
BIO_free (bio); BIO_free (bio);
return ret; return ret;
} }
@ -121,9 +122,10 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
// allocate context... // allocate context...
*ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t)); *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
if (!(*ctx)) if(!(*ctx))
return -1; return -1;
// ...and fill her up
// ...and fill her up:
// initialize data structures // initialize data structures
if(!((*ctx)->enc_ctx = EVP_CIPHER_CTX_new())) { if(!((*ctx)->enc_ctx = EVP_CIPHER_CTX_new())) {
@ -131,6 +133,7 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
openssl_err_as_string()); openssl_err_as_string());
return -1; return -1;
} }
if(!((*ctx)->dec_ctx = EVP_CIPHER_CTX_new())) { if(!((*ctx)->dec_ctx = EVP_CIPHER_CTX_new())) {
traceEvent(TRACE_ERROR, "aes_init openssl's evp_* decryption context creation failed: %s", traceEvent(TRACE_ERROR, "aes_init openssl's evp_* decryption context creation failed: %s",
openssl_err_as_string()); openssl_err_as_string());
@ -161,7 +164,7 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
} }
#elif defined (__AES__) && defined (__SSE2__) // Intel's AES-NI --------------------------- #elif defined (__AES__) && defined (__SSE2__) // Intel's AES-NI ---------------------------------------------------
// inspired by https://gist.github.com/acapola/d5b940da024080dfaf5f // inspired by https://gist.github.com/acapola/d5b940da024080dfaf5f
@ -172,11 +175,15 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
static __m128i aes128_keyexpand(__m128i key, __m128i keygened, uint8_t shuf) { static __m128i aes128_keyexpand(__m128i key, __m128i keygened, uint8_t shuf) {
key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); key = _mm_xor_si128(key, _mm_slli_si128(key, 4));
// unfortunately, shuffle expects immediate argument ... macrorize???!!!
switch (shuf) { // unfortunately, shuffle expects immediate argument, thus the not-so-stylish switch ...
// REVISIT: either macrorize this whole function (and perhaps the following one) or
// use shuffle_epi8 (which would require SSSE3 instead of SSE2)
switch(shuf) {
case 0x55: case 0x55:
keygened = _mm_shuffle_epi32(keygened, 0x55 ); keygened = _mm_shuffle_epi32(keygened, 0x55 );
break; break;
@ -189,23 +196,25 @@ static __m128i aes128_keyexpand(__m128i key, __m128i keygened, uint8_t shuf) {
default: default:
break; break;
} }
return _mm_xor_si128(key, keygened); return _mm_xor_si128(key, keygened);
} }
static __m128i aes192_keyexpand_2(__m128i key, __m128i key2) static __m128i aes192_keyexpand_2(__m128i key, __m128i key2) {
{
key = _mm_shuffle_epi32(key, 0xff); key = _mm_shuffle_epi32(key, 0xff);
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4)); key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4));
return _mm_xor_si128(key, key2); return _mm_xor_si128(key, key2);
} }
#define KEYEXP128(K, I) aes128_keyexpand(K, _mm_aeskeygenassist_si128(K, I), 0xff) #define KEYEXP128(K, I) aes128_keyexpand (K, _mm_aeskeygenassist_si128(K, I), 0xff)
#define KEYEXP192(K1, K2, I) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, I), 0x55) #define KEYEXP192(K1, K2, I) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, I), 0x55)
#define KEYEXP192_2(K1, K2) aes192_keyexpand_2(K1, K2) #define KEYEXP192_2(K1, K2) aes192_keyexpand_2(K1, K2)
#define KEYEXP256(K1, K2, I) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, I), 0xff) #define KEYEXP256(K1, K2, I) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, I), 0xff)
#define KEYEXP256_2(K1, K2) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, 0x00), 0xaa) #define KEYEXP256_2(K1, K2) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, 0x00), 0xaa)
// key setup // key setup
@ -215,61 +224,69 @@ static int aes_internal_key_setup (aes_context_t *ctx, const uint8_t *key, int k
ctx->Nr = 6 + (key_bits / 32); ctx->Nr = 6 + (key_bits / 32);
// encryption keys // encryption keys
switch (key_bits) { switch(key_bits) {
case 128: { case 128: {
ctx->rk_enc[0] = _mm_loadu_si128((const __m128i*)key); ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*)key);
ctx->rk_enc[1] = KEYEXP128(ctx->rk_enc[0], 0x01); ctx->rk_enc[ 1] = KEYEXP128(ctx->rk_enc[0], 0x01);
ctx->rk_enc[2] = KEYEXP128(ctx->rk_enc[1], 0x02); ctx->rk_enc[ 2] = KEYEXP128(ctx->rk_enc[1], 0x02);
ctx->rk_enc[3] = KEYEXP128(ctx->rk_enc[2], 0x04); ctx->rk_enc[ 3] = KEYEXP128(ctx->rk_enc[2], 0x04);
ctx->rk_enc[4] = KEYEXP128(ctx->rk_enc[3], 0x08); ctx->rk_enc[ 4] = KEYEXP128(ctx->rk_enc[3], 0x08);
ctx->rk_enc[5] = KEYEXP128(ctx->rk_enc[4], 0x10); ctx->rk_enc[ 5] = KEYEXP128(ctx->rk_enc[4], 0x10);
ctx->rk_enc[6] = KEYEXP128(ctx->rk_enc[5], 0x20); ctx->rk_enc[ 6] = KEYEXP128(ctx->rk_enc[5], 0x20);
ctx->rk_enc[7] = KEYEXP128(ctx->rk_enc[6], 0x40); ctx->rk_enc[ 7] = KEYEXP128(ctx->rk_enc[6], 0x40);
ctx->rk_enc[8] = KEYEXP128(ctx->rk_enc[7], 0x80); ctx->rk_enc[ 8] = KEYEXP128(ctx->rk_enc[7], 0x80);
ctx->rk_enc[9] = KEYEXP128(ctx->rk_enc[8], 0x1B); ctx->rk_enc[ 9] = KEYEXP128(ctx->rk_enc[8], 0x1B);
ctx->rk_enc[10] = KEYEXP128(ctx->rk_enc[9], 0x36); ctx->rk_enc[10] = KEYEXP128(ctx->rk_enc[9], 0x36);
break; break;
} }
case 192: { case 192: {
__m128i temp[2]; __m128i temp[2];
ctx->rk_enc[0] = _mm_loadu_si128((const __m128i*) key); ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*) key);
ctx->rk_enc[1] = _mm_loadu_si128((const __m128i*) (key+16));
ctx->rk_enc[ 1] = _mm_loadu_si128((const __m128i*) (key+16));
temp[0] = KEYEXP192(ctx->rk_enc[0], ctx->rk_enc[1], 0x01); temp[0] = KEYEXP192(ctx->rk_enc[0], ctx->rk_enc[1], 0x01);
temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[1]); temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[1]);
ctx->rk_enc[1] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[1], (__m128d)temp[0], 0); ctx->rk_enc[ 1] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[1], (__m128d)temp[0], 0);
ctx->rk_enc[2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
ctx->rk_enc[3] = KEYEXP192(temp[0], temp[1], 0x02); ctx->rk_enc[ 2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
ctx->rk_enc[4] = KEYEXP192_2(ctx->rk_enc[3], temp[1]); ctx->rk_enc[ 3] = KEYEXP192(temp[0], temp[1], 0x02);
ctx->rk_enc[ 4] = KEYEXP192_2(ctx->rk_enc[3], temp[1]);
temp[0] = KEYEXP192(ctx->rk_enc[3], ctx->rk_enc[4], 0x04); temp[0] = KEYEXP192(ctx->rk_enc[3], ctx->rk_enc[4], 0x04);
temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[4]); temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[4]);
ctx->rk_enc[4] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[4], (__m128d)temp[0], 0); ctx->rk_enc[ 4] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[4], (__m128d)temp[0], 0);
ctx->rk_enc[5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
ctx->rk_enc[6] = KEYEXP192(temp[0], temp[1], 0x08); ctx->rk_enc[ 5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
ctx->rk_enc[7] = KEYEXP192_2(ctx->rk_enc[6], temp[1]); ctx->rk_enc[ 6] = KEYEXP192(temp[0], temp[1], 0x08);
ctx->rk_enc[ 7] = KEYEXP192_2(ctx->rk_enc[6], temp[1]);
temp[0] = KEYEXP192(ctx->rk_enc[6], ctx->rk_enc[7], 0x10); temp[0] = KEYEXP192(ctx->rk_enc[6], ctx->rk_enc[7], 0x10);
temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[7]); temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[7]);
ctx->rk_enc[7] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[7], (__m128d)temp[0], 0); ctx->rk_enc[ 7] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[7], (__m128d)temp[0], 0);
ctx->rk_enc[8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
ctx->rk_enc[9] = KEYEXP192(temp[0], temp[1], 0x20); ctx->rk_enc[ 8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1);
ctx->rk_enc[ 9] = KEYEXP192(temp[0], temp[1], 0x20);
ctx->rk_enc[10] = KEYEXP192_2(ctx->rk_enc[9], temp[1]); ctx->rk_enc[10] = KEYEXP192_2(ctx->rk_enc[9], temp[1]);
temp[0] = KEYEXP192(ctx->rk_enc[9], ctx->rk_enc[10], 0x40); temp[0] = KEYEXP192(ctx->rk_enc[9], ctx->rk_enc[10], 0x40);
temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[10]); temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[10]);
ctx->rk_enc[10] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[10], (__m128d) temp[0], 0); ctx->rk_enc[10] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[10], (__m128d) temp[0], 0);
ctx->rk_enc[11] = (__m128i)_mm_shuffle_pd((__m128d)temp[0],(__m128d) temp[1], 1); ctx->rk_enc[11] = (__m128i)_mm_shuffle_pd((__m128d)temp[0],(__m128d) temp[1], 1);
ctx->rk_enc[12] = KEYEXP192(temp[0], temp[1], 0x80); ctx->rk_enc[12] = KEYEXP192(temp[0], temp[1], 0x80);
break; break;
} }
case 256: { case 256: {
ctx->rk_enc[0] = _mm_loadu_si128((const __m128i*) key); ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*) key);
ctx->rk_enc[1] = _mm_loadu_si128((const __m128i*) (key+16)); ctx->rk_enc[ 1] = _mm_loadu_si128((const __m128i*) (key+16));
ctx->rk_enc[2] = KEYEXP256(ctx->rk_enc[0], ctx->rk_enc[1], 0x01); ctx->rk_enc[ 2] = KEYEXP256(ctx->rk_enc[0], ctx->rk_enc[1], 0x01);
ctx->rk_enc[3] = KEYEXP256_2(ctx->rk_enc[1], ctx->rk_enc[2]); ctx->rk_enc[ 3] = KEYEXP256_2(ctx->rk_enc[1], ctx->rk_enc[2]);
ctx->rk_enc[4] = KEYEXP256(ctx->rk_enc[2], ctx->rk_enc[3], 0x02); ctx->rk_enc[ 4] = KEYEXP256(ctx->rk_enc[2], ctx->rk_enc[3], 0x02);
ctx->rk_enc[5] = KEYEXP256_2(ctx->rk_enc[3], ctx->rk_enc[4]); ctx->rk_enc[ 5] = KEYEXP256_2(ctx->rk_enc[3], ctx->rk_enc[4]);
ctx->rk_enc[6] = KEYEXP256(ctx->rk_enc[4], ctx->rk_enc[5], 0x04); ctx->rk_enc[ 6] = KEYEXP256(ctx->rk_enc[4], ctx->rk_enc[5], 0x04);
ctx->rk_enc[7] = KEYEXP256_2(ctx->rk_enc[5], ctx->rk_enc[6]); ctx->rk_enc[ 7] = KEYEXP256_2(ctx->rk_enc[5], ctx->rk_enc[6]);
ctx->rk_enc[8] = KEYEXP256(ctx->rk_enc[6], ctx->rk_enc[7], 0x08); ctx->rk_enc[ 8] = KEYEXP256(ctx->rk_enc[6], ctx->rk_enc[7], 0x08);
ctx->rk_enc[9] = KEYEXP256_2(ctx->rk_enc[7], ctx->rk_enc[8]); ctx->rk_enc[ 9] = KEYEXP256_2(ctx->rk_enc[7], ctx->rk_enc[8]);
ctx->rk_enc[10] = KEYEXP256(ctx->rk_enc[8], ctx->rk_enc[9], 0x10); ctx->rk_enc[10] = KEYEXP256(ctx->rk_enc[8], ctx->rk_enc[9], 0x10);
ctx->rk_enc[11] = KEYEXP256_2(ctx->rk_enc[9], ctx->rk_enc[10]); ctx->rk_enc[11] = KEYEXP256_2(ctx->rk_enc[9], ctx->rk_enc[10]);
ctx->rk_enc[12] = KEYEXP256(ctx->rk_enc[10], ctx->rk_enc[11], 0x20); ctx->rk_enc[12] = KEYEXP256(ctx->rk_enc[10], ctx->rk_enc[11], 0x20);
@ -280,10 +297,10 @@ static int aes_internal_key_setup (aes_context_t *ctx, const uint8_t *key, int k
} }
// derive decryption keys // derive decryption keys
for (int i = 1; i < ctx->Nr; ++i) { for(int i = 1; i < ctx->Nr; ++i) {
ctx->rk_dec[ctx->Nr - i] = _mm_aesimc_si128(ctx->rk_enc[i]); ctx->rk_dec[ctx->Nr - i] = _mm_aesimc_si128(ctx->rk_enc[i]);
} }
ctx->rk_dec[0] = ctx->rk_enc[ctx->Nr]; ctx->rk_dec[ 0] = ctx->rk_enc[ctx->Nr];
return ctx->Nr; return ctx->Nr;
} }
@ -311,7 +328,7 @@ static void aes_internal_encrypt (aes_context_t *ctx, const uint8_t pt[16], uint
tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]); tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]);
} }
} }
tmp = _mm_aesenclast_si128(tmp, ctx->rk_enc[ctx->Nr]); tmp = _mm_aesenclast_si128 (tmp, ctx->rk_enc[ctx->Nr]);
_mm_storeu_si128((__m128i*) ct, tmp); _mm_storeu_si128((__m128i*) ct, tmp);
} }
@ -339,7 +356,7 @@ static void aes_internal_decrypt (aes_context_t *ctx, const uint8_t ct[16], uint
tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]); tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]);
} }
} }
tmp = _mm_aesdeclast_si128(tmp, ctx->rk_enc[0]); tmp = _mm_aesdeclast_si128 (tmp, ctx->rk_enc[ 0]);
_mm_storeu_si128((__m128i*) pt, tmp); _mm_storeu_si128((__m128i*) pt, tmp);
} }
@ -368,8 +385,8 @@ int aes_ecb_encrypt (unsigned char *out, const unsigned char *in, aes_context_t
int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
const unsigned char *iv, aes_context_t *ctx) { const unsigned char *iv, aes_context_t *ctx) {
int n; // number of blocks int n; /* number of blocks */
int ret = (int)in_len & 15; // remainder int ret = (int)in_len & 15; /* remainder */
__m128i ivec = _mm_loadu_si128((__m128i*)iv); __m128i ivec = _mm_loadu_si128((__m128i*)iv);
@ -396,7 +413,7 @@ int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]); tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]);
} }
} }
tmp = _mm_aesenclast_si128(tmp, ctx->rk_enc[ctx->Nr]); tmp = _mm_aesenclast_si128 (tmp, ctx->rk_enc[ctx->Nr]);
ivec = tmp; ivec = tmp;
@ -411,11 +428,12 @@ int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
const unsigned char *iv, aes_context_t *ctx) { const unsigned char *iv, aes_context_t *ctx) {
int n; // number of blocks int n; /* number of blocks */
int ret = (int)in_len & 15; // remainder int ret = (int)in_len & 15; /* remainder */
__m128i ivec = _mm_loadu_si128((__m128i*)iv); __m128i ivec = _mm_loadu_si128((__m128i*)iv);
// 4 parallel rails of AES decryption to reduce data dependencies in x86's deep pipelines
for(n = in_len / 16; n > 3; n -=4) { for(n = in_len / 16; n > 3; n -=4) {
__m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16; __m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16;
__m128i tmp2 = _mm_loadu_si128((__m128i*)in); in += 16; __m128i tmp2 = _mm_loadu_si128((__m128i*)in); in += 16;
@ -484,9 +502,11 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
_mm_storeu_si128((__m128i*) out, tmp2); out += 16; _mm_storeu_si128((__m128i*) out, tmp2); out += 16;
_mm_storeu_si128((__m128i*) out, tmp3); out += 16; _mm_storeu_si128((__m128i*) out, tmp3); out += 16;
_mm_storeu_si128((__m128i*) out, tmp4); out += 16; _mm_storeu_si128((__m128i*) out, tmp4); out += 16;
} // now: less than 4 blocks remaining }
// now: less than 4 blocks remaining
if(n > 1) { // 2 or 3 blocks remaining --> this code handles two of them // if 2 or 3 blocks remaining --> this code handles two of them
if(n > 1) {
n-= 2; n-= 2;
__m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16; __m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16;
@ -513,7 +533,7 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[13]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[13]); tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[13]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[13]);
} }
} }
tmp1 = _mm_aesdeclast_si128(tmp1, ctx->rk_enc[ 0]); tmp2 = _mm_aesdeclast_si128(tmp2, ctx->rk_enc[ 0]); tmp1 = _mm_aesdeclast_si128 (tmp1, ctx->rk_enc[ 0]); tmp2 = _mm_aesdeclast_si128(tmp2, ctx->rk_enc[ 0]);
tmp1 = _mm_xor_si128 (tmp1, ivec); tmp2 = _mm_xor_si128 (tmp2, old_in1); tmp1 = _mm_xor_si128 (tmp1, ivec); tmp2 = _mm_xor_si128 (tmp2, old_in1);
@ -523,7 +543,8 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
_mm_storeu_si128((__m128i*) out, tmp2); out += 16; _mm_storeu_si128((__m128i*) out, tmp2); out += 16;
} }
if(n) { // one block remaining // one block remaining
if(n) {
__m128i tmp = _mm_loadu_si128((__m128i*)in); __m128i tmp = _mm_loadu_si128((__m128i*)in);
__m128i old_in = tmp; __m128i old_in = tmp;
@ -545,7 +566,7 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]); tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]);
} }
} }
tmp = _mm_aesdeclast_si128(tmp, ctx->rk_enc[ 0]); tmp = _mm_aesdeclast_si128 (tmp, ctx->rk_enc[ 0]);
tmp = _mm_xor_si128 (tmp, ivec); tmp = _mm_xor_si128 (tmp, ivec);
@ -560,9 +581,9 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
// allocate context... // allocate context...
*ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t)); *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
if (!(*ctx)) if(!(*ctx))
return -1; return -1;
// ...and fill her up // ...and fill her up:
// initialize data structures // initialize data structures
@ -581,12 +602,14 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
// key materiel handling // key materiel handling
aes_internal_key_setup ( *ctx, key, 8 * key_size); aes_internal_key_setup ( *ctx, key, 8 * key_size);
return 0; return 0;
} }
#else // plain C -------------------------------------------------------------------------- #else // plain C --------------------------------------------------------------------------
// rijndael-alg-fst.c version 3.0 (December 2000) // rijndael-alg-fst.c version 3.0 (December 2000)
// optimised ANSI C code for the Rijndael cipher (now AES) // optimised ANSI C code for the Rijndael cipher (now AES)
// original authors: Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be> // original authors: Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
@ -968,11 +991,8 @@ static const uint32_t rcon[] = {
#define m3(x) ((x) & 0xff000000) #define m3(x) ((x) & 0xff000000)
/** // expand the cipher key into the encryption key schedule and
* Expand the cipher key into the encryption key schedule. // return the number of rounds for the given cipher key size
*
* @return the number of rounds for the given cipher key size.
*/
static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) { static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
int i = 0; int i = 0;
@ -982,8 +1002,8 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_
rk[1] = GETU32(cipherKey + 4); rk[1] = GETU32(cipherKey + 4);
rk[2] = GETU32(cipherKey + 8); rk[2] = GETU32(cipherKey + 8);
rk[3] = GETU32(cipherKey + 12); rk[3] = GETU32(cipherKey + 12);
if (keyBits == 128) { if(keyBits == 128) {
for (;;) { for(;;) {
temp = rk[3]; temp = rk[3];
rk[4] = rk[0] ^ rk[4] = rk[0] ^
(Te4[b2(temp)] & 0xff000000) ^ (Te4[b2(temp)] & 0xff000000) ^
@ -994,7 +1014,7 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_
rk[5] = rk[1] ^ rk[4]; rk[5] = rk[1] ^ rk[4];
rk[6] = rk[2] ^ rk[5]; rk[6] = rk[2] ^ rk[5];
rk[7] = rk[3] ^ rk[6]; rk[7] = rk[3] ^ rk[6];
if (++i == 10) { if(++i == 10) {
return 10; return 10;
} }
rk += 4; rk += 4;
@ -1002,8 +1022,8 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_
} }
rk[4] = GETU32(cipherKey + 16); rk[4] = GETU32(cipherKey + 16);
rk[5] = GETU32(cipherKey + 20); rk[5] = GETU32(cipherKey + 20);
if (keyBits == 192) { if(keyBits == 192) {
for (;;) { for(;;) {
temp = rk[ 5]; temp = rk[ 5];
rk[ 6] = rk[ 0] ^ rk[ 6] = rk[ 0] ^
(Te4[b2(temp)] & 0xff000000) ^ (Te4[b2(temp)] & 0xff000000) ^
@ -1014,7 +1034,7 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_
rk[ 7] = rk[ 1] ^ rk[ 6]; rk[ 7] = rk[ 1] ^ rk[ 6];
rk[ 8] = rk[ 2] ^ rk[ 7]; rk[ 8] = rk[ 2] ^ rk[ 7];
rk[ 9] = rk[ 3] ^ rk[ 8]; rk[ 9] = rk[ 3] ^ rk[ 8];
if (++i == 8) { if(++i == 8) {
return 12; return 12;
} }
rk[10] = rk[ 4] ^ rk[ 9]; rk[10] = rk[ 4] ^ rk[ 9];
@ -1024,8 +1044,8 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_
} }
rk[6] = GETU32(cipherKey + 24); rk[6] = GETU32(cipherKey + 24);
rk[7] = GETU32(cipherKey + 28); rk[7] = GETU32(cipherKey + 28);
if (keyBits == 256) { if(keyBits == 256) {
for (;;) { for(;;) {
temp = rk[ 7]; temp = rk[ 7];
rk[ 8] = rk[ 0] ^ rk[ 8] = rk[ 0] ^
(Te4[b2(temp)] & 0xff000000) ^ (Te4[b2(temp)] & 0xff000000) ^
@ -1036,7 +1056,7 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_
rk[ 9] = rk[ 1] ^ rk[ 8]; rk[ 9] = rk[ 1] ^ rk[ 8];
rk[10] = rk[ 2] ^ rk[ 9]; rk[10] = rk[ 2] ^ rk[ 9];
rk[11] = rk[ 3] ^ rk[10]; rk[11] = rk[ 3] ^ rk[10];
if (++i == 7) { if(++i == 7) {
return 14; return 14;
} }
temp = rk[11]; temp = rk[11];
@ -1048,22 +1068,19 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_
rk[13] = rk[ 5] ^ rk[12]; rk[13] = rk[ 5] ^ rk[12];
rk[14] = rk[ 6] ^ rk[13]; rk[14] = rk[ 6] ^ rk[13];
rk[15] = rk[ 7] ^ rk[14]; rk[15] = rk[ 7] ^ rk[14];
rk += 8; rk += 8;
} }
} }
return 0; return 0;
} }
/**
* Expand the cipher key into the decryption key schedule.
*
* @return the number of rounds for the given cipher key size.
*/
#define INVMIXCOLRK(n) rk[n] = Td0[b0(Te4[b3(rk[n])])] ^ Td1[b0(Te4[b2(rk[n])])] ^ Td2[b0(Te4[b1(rk[n])])] ^ Td3[b0(Te4[b0(rk[n])])] #define INVMIXCOLRK(n) rk[n] = Td0[b0(Te4[b3(rk[n])])] ^ Td1[b0(Te4[b2(rk[n])])] ^ Td2[b0(Te4[b1(rk[n])])] ^ Td3[b0(Te4[b0(rk[n])])]
// expand the cipher key into the decryption key schedule and
// return the number of rounds for the given cipher key size
static int aes_internal_key_setup_dec (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) { static int aes_internal_key_setup_dec (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) {
int Nr, i, j; int Nr, i, j;
@ -1072,14 +1089,15 @@ static int aes_internal_key_setup_dec (uint32_t rk[/*4*(Nr + 1)*/], const uint8_
// expand the cipher key // expand the cipher key
Nr = aes_internal_key_setup_enc(rk, cipherKey, keyBits); Nr = aes_internal_key_setup_enc(rk, cipherKey, keyBits);
// invert the order of the round keys // invert the order of the round keys
for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { for(i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
} }
// apply the inverse MixColumn transform to all round keys but the first and the last // apply the inverse MixColumn transform to all round keys but the first and the last
for (i = 1; i < Nr; i++) { for(i = 1; i < Nr; i++) {
rk += 4; rk += 4;
INVMIXCOLRK(0); INVMIXCOLRK(0);
INVMIXCOLRK(1); INVMIXCOLRK(1);
@ -1212,6 +1230,7 @@ int aes_ecb_encrypt (unsigned char *out, const unsigned char *in, aes_context_t
#define fix_xor(target, source) *(uint32_t*)&(target)[0] = *(uint32_t*)&(target)[0] ^ *(uint32_t*)&(source)[0]; *(uint32_t*)&(target)[4] = *(uint32_t*)&(target)[4] ^ *(uint32_t*)&(source)[4]; \ #define fix_xor(target, source) *(uint32_t*)&(target)[0] = *(uint32_t*)&(target)[0] ^ *(uint32_t*)&(source)[0]; *(uint32_t*)&(target)[4] = *(uint32_t*)&(target)[4] ^ *(uint32_t*)&(source)[4]; \
*(uint32_t*)&(target)[8] = *(uint32_t*)&(target)[8] ^ *(uint32_t*)&(source)[8]; *(uint32_t*)&(target)[12] = *(uint32_t*)&(target)[12] ^ *(uint32_t*)&(source)[12]; *(uint32_t*)&(target)[8] = *(uint32_t*)&(target)[8] ^ *(uint32_t*)&(source)[8]; *(uint32_t*)&(target)[12] = *(uint32_t*)&(target)[12] ^ *(uint32_t*)&(source)[12];
int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
const unsigned char *iv, aes_context_t *ctx) { const unsigned char *iv, aes_context_t *ctx) {
@ -1227,6 +1246,7 @@ int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len,
aes_internal_encrypt(ctx->enc_rk, ctx->Nr, tmp, tmp); aes_internal_encrypt(ctx->enc_rk, ctx->Nr, tmp, tmp);
memcpy(&out[i * AES_BLOCK_SIZE], tmp, AES_BLOCK_SIZE); memcpy(&out[i * AES_BLOCK_SIZE], tmp, AES_BLOCK_SIZE);
} }
return n * AES_BLOCK_SIZE; return n * AES_BLOCK_SIZE;
} }
@ -1252,13 +1272,14 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len,
return n * AES_BLOCK_SIZE; return n * AES_BLOCK_SIZE;
} }
int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
// allocate context... // allocate context...
*ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t)); *ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
if (!(*ctx)) if(!(*ctx))
return -1; return -1;
// ...and fill her up // ...and fill her up:
// initialize data structures // initialize data structures
@ -1282,136 +1303,12 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
} }
#endif // openSSL 1.1, AES-NI, plain C ---------------------------------------------------- #endif // openSSL 1.1, AES-NI, plain C ----------------------------------------------------------------------------
int aes_deinit (aes_context_t *ctx) {
if (ctx) free (ctx);
return 0;
}
// --- for testing ------------------------------------------------------------------------
// --- remove when done ---
/* int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
// allocate context...
*ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t));
if (!(*ctx))
return -1;
// ...and fill her up
// initialize data structures
#ifdef HAVE_OPENSSL_1_1
if(!((*ctx)->enc_ctx = EVP_CIPHER_CTX_new())) {
traceEvent(TRACE_ERROR, "aes_init openssl's evp_* encryption context creation failed: %s",
openssl_err_as_string());
return(-1);
}
if(!((*ctx)->dec_ctx = EVP_CIPHER_CTX_new())) {
traceEvent(TRACE_ERROR, "aes_init openssl's evp_* decryption context creation failed: %s",
openssl_err_as_string());
return(-1);
}
#endif
// check key size and make key size (given in bytes) dependant settings int aes_deinit (aes_context_t *ctx) {
switch(key_size) {
case AES128_KEY_BYTES: // 128 bit key size
#ifdef HAVE_OPENSSL_1_1
(*ctx)->cipher = EVP_aes_128_cbc();
#endif
break;
case AES192_KEY_BYTES: // 192 bit key size
#ifdef HAVE_OPENSSL_1_1
(*ctx)->cipher = EVP_aes_192_cbc();
#endif
break;
case AES256_KEY_BYTES: // 256 bit key size
#ifdef HAVE_OPENSSL_1_1
(*ctx)->cipher = EVP_aes_256_cbc();
#endif
break;
default:
traceEvent(TRACE_ERROR, "aes_init invalid key size %u\n", key_size);
return -1;
}
// key materiel handling if(ctx) free(ctx);
#ifdef HAVE_OPENSSL_1_1
memcpy((*ctx)->key, key, key_size);
AES_set_decrypt_key(key, key_size * 8, &((*ctx)->ecb_dec_key));
#else
AES_set_encrypt_key(key, key_size * 8, &((*ctx)->enc_key));
AES_set_decrypt_key(key, key_size * 8, &((*ctx)->dec_key));
#endif
return 0; return 0;
} }
#ifdef TEST_AES
int main () {
aes_context_t *ctx;
// *ctx = malloc(sizeof(aes_context_t));
// uint8_t key[32] = {0};
// 128 bit key 0 --> 0336763e966d92595a567cc9ce537f5e
// uint8_t pt[16] = {0xf3, 0x44, 0x81, 0xec, 0x3c, 0xc6, 0x27, 0xba,
// 0xcd, 0x5d, 0xc3, 0xfb, 0x08, 0xf2, 0x73, 0xe6 };
// 256 bit key 0 --> 5c9d844ed46f9885085e5d6a4f94c7d7
// uint8_t pt[16] = {0x01, 0x47, 0x30, 0xf8, 0x0a, 0xc6, 0x25, 0xfe,
// 0x84, 0xf0, 0x26, 0xc6, 0x0b, 0xfd, 0x54, 0x7d };
uint8_t pt[16] = {0};
// 0 pt --> 6d251e6944b051e04eaa6fb4dbf78465
uint8_t key[16] = {0x10, 0xa5, 0x88, 0x69, 0xd7, 0x4b, 0xe5, 0xa3,
0x74, 0xcf, 0x86, 0x7c, 0xfb, 0x47, 0x38, 0x59 };
uint8_t ct[16] = {0};
int i;
// aes_internal_key_setup (ctx, key, 8 * sizeof(key));
aes_init (key, sizeof(key), &ctx);
printf ("Nr = %u\n",(ctx)->Nr);
memset (pt, 0, 16);
for(i = 0; i < 16; i++)
printf ("%02x",pt[i]);
printf ("--- pt\n");
aes_internal_encrypt((ctx), pt, ct);
memset (pt, 4, 16);
for(i = 0; i < 16; i++)
printf ("%02x",ct[i]);
printf ("--- ct\n");
printf ("Nr = %u\n",(ctx)->Nr);
printf ("Nr = %u\n",(ctx)->Nr);
aes_internal_decrypt((ctx), ct, pt);
memset (ct, 9, 16);
for(i = 0; i < 16; i++)
printf ("%02x",pt[i]);
printf ("--- pt\n");
aes_internal_encrypt((ctx), pt, ct);
for(i = 0; i < 16; i++)
printf ("%02x",ct[i]);
printf ("--- ct\n");
}
#endif
*/

Loading…
Cancel
Save