|
|
@ -20,7 +20,7 @@ |
|
|
|
#include "n2n.h" |
|
|
|
|
|
|
|
|
|
|
|
#if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 ---------------------------------------------
|
|
|
|
#if defined (HAVE_OPENSSL_1_1) // openSSL 1.1 ---------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
// get any erorr message out of openssl
|
|
|
@ -37,6 +37,7 @@ static char *openssl_err_as_string (void) { |
|
|
|
memcpy (ret, buf, len); |
|
|
|
|
|
|
|
BIO_free (bio); |
|
|
|
|
|
|
|
return ret; |
|
|
|
} |
|
|
|
|
|
|
@ -121,9 +122,10 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { |
|
|
|
|
|
|
|
// allocate context...
|
|
|
|
*ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t)); |
|
|
|
if (!(*ctx)) |
|
|
|
if(!(*ctx)) |
|
|
|
return -1; |
|
|
|
// ...and fill her up
|
|
|
|
|
|
|
|
// ...and fill her up:
|
|
|
|
|
|
|
|
// initialize data structures
|
|
|
|
if(!((*ctx)->enc_ctx = EVP_CIPHER_CTX_new())) { |
|
|
@ -131,6 +133,7 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { |
|
|
|
openssl_err_as_string()); |
|
|
|
return -1; |
|
|
|
} |
|
|
|
|
|
|
|
if(!((*ctx)->dec_ctx = EVP_CIPHER_CTX_new())) { |
|
|
|
traceEvent(TRACE_ERROR, "aes_init openssl's evp_* decryption context creation failed: %s", |
|
|
|
openssl_err_as_string()); |
|
|
@ -161,7 +164,7 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#elif defined (__AES__) && defined (__SSE2__) // Intel's AES-NI ---------------------------
|
|
|
|
#elif defined (__AES__) && defined (__SSE2__) // Intel's AES-NI ---------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
// inspired by https://gist.github.com/acapola/d5b940da024080dfaf5f
|
|
|
@ -172,11 +175,15 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { |
|
|
|
|
|
|
|
|
|
|
|
static __m128i aes128_keyexpand(__m128i key, __m128i keygened, uint8_t shuf) { |
|
|
|
|
|
|
|
key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); |
|
|
|
key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); |
|
|
|
key = _mm_xor_si128(key, _mm_slli_si128(key, 4)); |
|
|
|
// unfortunately, shuffle expects immediate argument ... macrorize???!!!
|
|
|
|
switch (shuf) { |
|
|
|
|
|
|
|
// unfortunately, shuffle expects immediate argument, thus the not-so-stylish switch ...
|
|
|
|
// REVISIT: either macrorize this whole function (and perhaps the following one) or
|
|
|
|
// use shuffle_epi8 (which would require SSSE3 instead of SSE2)
|
|
|
|
switch(shuf) { |
|
|
|
case 0x55: |
|
|
|
keygened = _mm_shuffle_epi32(keygened, 0x55 ); |
|
|
|
break; |
|
|
@ -189,23 +196,25 @@ static __m128i aes128_keyexpand(__m128i key, __m128i keygened, uint8_t shuf) { |
|
|
|
default: |
|
|
|
break; |
|
|
|
} |
|
|
|
|
|
|
|
return _mm_xor_si128(key, keygened); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
static __m128i aes192_keyexpand_2(__m128i key, __m128i key2) |
|
|
|
{ |
|
|
|
static __m128i aes192_keyexpand_2(__m128i key, __m128i key2) { |
|
|
|
|
|
|
|
key = _mm_shuffle_epi32(key, 0xff); |
|
|
|
key2 = _mm_xor_si128(key2, _mm_slli_si128(key2, 4)); |
|
|
|
|
|
|
|
return _mm_xor_si128(key, key2); |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#define KEYEXP128(K, I) aes128_keyexpand(K, _mm_aeskeygenassist_si128(K, I), 0xff) |
|
|
|
#define KEYEXP192(K1, K2, I) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, I), 0x55) |
|
|
|
#define KEYEXP128(K, I) aes128_keyexpand (K, _mm_aeskeygenassist_si128(K, I), 0xff) |
|
|
|
#define KEYEXP192(K1, K2, I) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, I), 0x55) |
|
|
|
#define KEYEXP192_2(K1, K2) aes192_keyexpand_2(K1, K2) |
|
|
|
#define KEYEXP256(K1, K2, I) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, I), 0xff) |
|
|
|
#define KEYEXP256_2(K1, K2) aes128_keyexpand(K1, _mm_aeskeygenassist_si128(K2, 0x00), 0xaa) |
|
|
|
#define KEYEXP256(K1, K2, I) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, I), 0xff) |
|
|
|
#define KEYEXP256_2(K1, K2) aes128_keyexpand (K1, _mm_aeskeygenassist_si128(K2, 0x00), 0xaa) |
|
|
|
|
|
|
|
|
|
|
|
// key setup
|
|
|
@ -215,61 +224,69 @@ static int aes_internal_key_setup (aes_context_t *ctx, const uint8_t *key, int k |
|
|
|
ctx->Nr = 6 + (key_bits / 32); |
|
|
|
|
|
|
|
// encryption keys
|
|
|
|
switch (key_bits) { |
|
|
|
switch(key_bits) { |
|
|
|
case 128: { |
|
|
|
ctx->rk_enc[0] = _mm_loadu_si128((const __m128i*)key); |
|
|
|
ctx->rk_enc[1] = KEYEXP128(ctx->rk_enc[0], 0x01); |
|
|
|
ctx->rk_enc[2] = KEYEXP128(ctx->rk_enc[1], 0x02); |
|
|
|
ctx->rk_enc[3] = KEYEXP128(ctx->rk_enc[2], 0x04); |
|
|
|
ctx->rk_enc[4] = KEYEXP128(ctx->rk_enc[3], 0x08); |
|
|
|
ctx->rk_enc[5] = KEYEXP128(ctx->rk_enc[4], 0x10); |
|
|
|
ctx->rk_enc[6] = KEYEXP128(ctx->rk_enc[5], 0x20); |
|
|
|
ctx->rk_enc[7] = KEYEXP128(ctx->rk_enc[6], 0x40); |
|
|
|
ctx->rk_enc[8] = KEYEXP128(ctx->rk_enc[7], 0x80); |
|
|
|
ctx->rk_enc[9] = KEYEXP128(ctx->rk_enc[8], 0x1B); |
|
|
|
ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*)key); |
|
|
|
ctx->rk_enc[ 1] = KEYEXP128(ctx->rk_enc[0], 0x01); |
|
|
|
ctx->rk_enc[ 2] = KEYEXP128(ctx->rk_enc[1], 0x02); |
|
|
|
ctx->rk_enc[ 3] = KEYEXP128(ctx->rk_enc[2], 0x04); |
|
|
|
ctx->rk_enc[ 4] = KEYEXP128(ctx->rk_enc[3], 0x08); |
|
|
|
ctx->rk_enc[ 5] = KEYEXP128(ctx->rk_enc[4], 0x10); |
|
|
|
ctx->rk_enc[ 6] = KEYEXP128(ctx->rk_enc[5], 0x20); |
|
|
|
ctx->rk_enc[ 7] = KEYEXP128(ctx->rk_enc[6], 0x40); |
|
|
|
ctx->rk_enc[ 8] = KEYEXP128(ctx->rk_enc[7], 0x80); |
|
|
|
ctx->rk_enc[ 9] = KEYEXP128(ctx->rk_enc[8], 0x1B); |
|
|
|
ctx->rk_enc[10] = KEYEXP128(ctx->rk_enc[9], 0x36); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 192: { |
|
|
|
__m128i temp[2]; |
|
|
|
ctx->rk_enc[0] = _mm_loadu_si128((const __m128i*) key); |
|
|
|
ctx->rk_enc[1] = _mm_loadu_si128((const __m128i*) (key+16)); |
|
|
|
ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*) key); |
|
|
|
|
|
|
|
ctx->rk_enc[ 1] = _mm_loadu_si128((const __m128i*) (key+16)); |
|
|
|
temp[0] = KEYEXP192(ctx->rk_enc[0], ctx->rk_enc[1], 0x01); |
|
|
|
temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[1]); |
|
|
|
ctx->rk_enc[1] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[1], (__m128d)temp[0], 0); |
|
|
|
ctx->rk_enc[2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); |
|
|
|
ctx->rk_enc[3] = KEYEXP192(temp[0], temp[1], 0x02); |
|
|
|
ctx->rk_enc[4] = KEYEXP192_2(ctx->rk_enc[3], temp[1]); |
|
|
|
ctx->rk_enc[ 1] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[1], (__m128d)temp[0], 0); |
|
|
|
|
|
|
|
ctx->rk_enc[ 2] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); |
|
|
|
ctx->rk_enc[ 3] = KEYEXP192(temp[0], temp[1], 0x02); |
|
|
|
|
|
|
|
ctx->rk_enc[ 4] = KEYEXP192_2(ctx->rk_enc[3], temp[1]); |
|
|
|
temp[0] = KEYEXP192(ctx->rk_enc[3], ctx->rk_enc[4], 0x04); |
|
|
|
temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[4]); |
|
|
|
ctx->rk_enc[4] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[4], (__m128d)temp[0], 0); |
|
|
|
ctx->rk_enc[5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); |
|
|
|
ctx->rk_enc[6] = KEYEXP192(temp[0], temp[1], 0x08); |
|
|
|
ctx->rk_enc[7] = KEYEXP192_2(ctx->rk_enc[6], temp[1]); |
|
|
|
ctx->rk_enc[ 4] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[4], (__m128d)temp[0], 0); |
|
|
|
|
|
|
|
ctx->rk_enc[ 5] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); |
|
|
|
ctx->rk_enc[ 6] = KEYEXP192(temp[0], temp[1], 0x08); |
|
|
|
|
|
|
|
ctx->rk_enc[ 7] = KEYEXP192_2(ctx->rk_enc[6], temp[1]); |
|
|
|
temp[0] = KEYEXP192(ctx->rk_enc[6], ctx->rk_enc[7], 0x10); |
|
|
|
temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[7]); |
|
|
|
ctx->rk_enc[7] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[7], (__m128d)temp[0], 0); |
|
|
|
ctx->rk_enc[8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); |
|
|
|
ctx->rk_enc[9] = KEYEXP192(temp[0], temp[1], 0x20); |
|
|
|
ctx->rk_enc[ 7] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[7], (__m128d)temp[0], 0); |
|
|
|
|
|
|
|
ctx->rk_enc[ 8] = (__m128i)_mm_shuffle_pd((__m128d)temp[0], (__m128d)temp[1], 1); |
|
|
|
ctx->rk_enc[ 9] = KEYEXP192(temp[0], temp[1], 0x20); |
|
|
|
|
|
|
|
ctx->rk_enc[10] = KEYEXP192_2(ctx->rk_enc[9], temp[1]); |
|
|
|
temp[0] = KEYEXP192(ctx->rk_enc[9], ctx->rk_enc[10], 0x40); |
|
|
|
temp[1] = KEYEXP192_2(temp[0], ctx->rk_enc[10]); |
|
|
|
ctx->rk_enc[10] = (__m128i)_mm_shuffle_pd((__m128d)ctx->rk_enc[10], (__m128d) temp[0], 0); |
|
|
|
|
|
|
|
ctx->rk_enc[11] = (__m128i)_mm_shuffle_pd((__m128d)temp[0],(__m128d) temp[1], 1); |
|
|
|
ctx->rk_enc[12] = KEYEXP192(temp[0], temp[1], 0x80); |
|
|
|
break; |
|
|
|
} |
|
|
|
case 256: { |
|
|
|
ctx->rk_enc[0] = _mm_loadu_si128((const __m128i*) key); |
|
|
|
ctx->rk_enc[1] = _mm_loadu_si128((const __m128i*) (key+16)); |
|
|
|
ctx->rk_enc[2] = KEYEXP256(ctx->rk_enc[0], ctx->rk_enc[1], 0x01); |
|
|
|
ctx->rk_enc[3] = KEYEXP256_2(ctx->rk_enc[1], ctx->rk_enc[2]); |
|
|
|
ctx->rk_enc[4] = KEYEXP256(ctx->rk_enc[2], ctx->rk_enc[3], 0x02); |
|
|
|
ctx->rk_enc[5] = KEYEXP256_2(ctx->rk_enc[3], ctx->rk_enc[4]); |
|
|
|
ctx->rk_enc[6] = KEYEXP256(ctx->rk_enc[4], ctx->rk_enc[5], 0x04); |
|
|
|
ctx->rk_enc[7] = KEYEXP256_2(ctx->rk_enc[5], ctx->rk_enc[6]); |
|
|
|
ctx->rk_enc[8] = KEYEXP256(ctx->rk_enc[6], ctx->rk_enc[7], 0x08); |
|
|
|
ctx->rk_enc[9] = KEYEXP256_2(ctx->rk_enc[7], ctx->rk_enc[8]); |
|
|
|
ctx->rk_enc[ 0] = _mm_loadu_si128((const __m128i*) key); |
|
|
|
ctx->rk_enc[ 1] = _mm_loadu_si128((const __m128i*) (key+16)); |
|
|
|
ctx->rk_enc[ 2] = KEYEXP256(ctx->rk_enc[0], ctx->rk_enc[1], 0x01); |
|
|
|
ctx->rk_enc[ 3] = KEYEXP256_2(ctx->rk_enc[1], ctx->rk_enc[2]); |
|
|
|
ctx->rk_enc[ 4] = KEYEXP256(ctx->rk_enc[2], ctx->rk_enc[3], 0x02); |
|
|
|
ctx->rk_enc[ 5] = KEYEXP256_2(ctx->rk_enc[3], ctx->rk_enc[4]); |
|
|
|
ctx->rk_enc[ 6] = KEYEXP256(ctx->rk_enc[4], ctx->rk_enc[5], 0x04); |
|
|
|
ctx->rk_enc[ 7] = KEYEXP256_2(ctx->rk_enc[5], ctx->rk_enc[6]); |
|
|
|
ctx->rk_enc[ 8] = KEYEXP256(ctx->rk_enc[6], ctx->rk_enc[7], 0x08); |
|
|
|
ctx->rk_enc[ 9] = KEYEXP256_2(ctx->rk_enc[7], ctx->rk_enc[8]); |
|
|
|
ctx->rk_enc[10] = KEYEXP256(ctx->rk_enc[8], ctx->rk_enc[9], 0x10); |
|
|
|
ctx->rk_enc[11] = KEYEXP256_2(ctx->rk_enc[9], ctx->rk_enc[10]); |
|
|
|
ctx->rk_enc[12] = KEYEXP256(ctx->rk_enc[10], ctx->rk_enc[11], 0x20); |
|
|
@ -280,10 +297,10 @@ static int aes_internal_key_setup (aes_context_t *ctx, const uint8_t *key, int k |
|
|
|
} |
|
|
|
|
|
|
|
// derive decryption keys
|
|
|
|
for (int i = 1; i < ctx->Nr; ++i) { |
|
|
|
for(int i = 1; i < ctx->Nr; ++i) { |
|
|
|
ctx->rk_dec[ctx->Nr - i] = _mm_aesimc_si128(ctx->rk_enc[i]); |
|
|
|
} |
|
|
|
ctx->rk_dec[0] = ctx->rk_enc[ctx->Nr]; |
|
|
|
ctx->rk_dec[ 0] = ctx->rk_enc[ctx->Nr]; |
|
|
|
|
|
|
|
return ctx->Nr; |
|
|
|
} |
|
|
@ -311,7 +328,7 @@ static void aes_internal_encrypt (aes_context_t *ctx, const uint8_t pt[16], uint |
|
|
|
tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]); |
|
|
|
} |
|
|
|
} |
|
|
|
tmp = _mm_aesenclast_si128(tmp, ctx->rk_enc[ctx->Nr]); |
|
|
|
tmp = _mm_aesenclast_si128 (tmp, ctx->rk_enc[ctx->Nr]); |
|
|
|
|
|
|
|
_mm_storeu_si128((__m128i*) ct, tmp); |
|
|
|
} |
|
|
@ -339,7 +356,7 @@ static void aes_internal_decrypt (aes_context_t *ctx, const uint8_t ct[16], uint |
|
|
|
tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]); |
|
|
|
} |
|
|
|
} |
|
|
|
tmp = _mm_aesdeclast_si128(tmp, ctx->rk_enc[0]); |
|
|
|
tmp = _mm_aesdeclast_si128 (tmp, ctx->rk_enc[ 0]); |
|
|
|
|
|
|
|
_mm_storeu_si128((__m128i*) pt, tmp); |
|
|
|
} |
|
|
@ -368,8 +385,8 @@ int aes_ecb_encrypt (unsigned char *out, const unsigned char *in, aes_context_t |
|
|
|
int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
const unsigned char *iv, aes_context_t *ctx) { |
|
|
|
|
|
|
|
int n; // number of blocks
|
|
|
|
int ret = (int)in_len & 15; // remainder
|
|
|
|
int n; /* number of blocks */ |
|
|
|
int ret = (int)in_len & 15; /* remainder */ |
|
|
|
|
|
|
|
__m128i ivec = _mm_loadu_si128((__m128i*)iv); |
|
|
|
|
|
|
@ -396,7 +413,7 @@ int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
tmp = _mm_aesenc_si128(tmp, ctx->rk_enc[13]); |
|
|
|
} |
|
|
|
} |
|
|
|
tmp = _mm_aesenclast_si128(tmp, ctx->rk_enc[ctx->Nr]); |
|
|
|
tmp = _mm_aesenclast_si128 (tmp, ctx->rk_enc[ctx->Nr]); |
|
|
|
|
|
|
|
ivec = tmp; |
|
|
|
|
|
|
@ -411,11 +428,12 @@ int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
const unsigned char *iv, aes_context_t *ctx) { |
|
|
|
|
|
|
|
int n; // number of blocks
|
|
|
|
int ret = (int)in_len & 15; // remainder
|
|
|
|
int n; /* number of blocks */ |
|
|
|
int ret = (int)in_len & 15; /* remainder */ |
|
|
|
|
|
|
|
__m128i ivec = _mm_loadu_si128((__m128i*)iv); |
|
|
|
|
|
|
|
// 4 parallel rails of AES decryption to reduce data dependencies in x86's deep pipelines
|
|
|
|
for(n = in_len / 16; n > 3; n -=4) { |
|
|
|
__m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16; |
|
|
|
__m128i tmp2 = _mm_loadu_si128((__m128i*)in); in += 16; |
|
|
@ -484,9 +502,11 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
_mm_storeu_si128((__m128i*) out, tmp2); out += 16; |
|
|
|
_mm_storeu_si128((__m128i*) out, tmp3); out += 16; |
|
|
|
_mm_storeu_si128((__m128i*) out, tmp4); out += 16; |
|
|
|
} // now: less than 4 blocks remaining
|
|
|
|
} |
|
|
|
// now: less than 4 blocks remaining
|
|
|
|
|
|
|
|
if(n > 1) { // 2 or 3 blocks remaining --> this code handles two of them
|
|
|
|
// if 2 or 3 blocks remaining --> this code handles two of them
|
|
|
|
if(n > 1) { |
|
|
|
n-= 2; |
|
|
|
|
|
|
|
__m128i tmp1 = _mm_loadu_si128((__m128i*)in); in += 16; |
|
|
@ -513,7 +533,7 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
tmp1 = _mm_aesdec_si128(tmp1, ctx->rk_dec[13]); tmp2 = _mm_aesdec_si128 (tmp2, ctx->rk_dec[13]); |
|
|
|
} |
|
|
|
} |
|
|
|
tmp1 = _mm_aesdeclast_si128(tmp1, ctx->rk_enc[ 0]); tmp2 = _mm_aesdeclast_si128(tmp2, ctx->rk_enc[ 0]); |
|
|
|
tmp1 = _mm_aesdeclast_si128 (tmp1, ctx->rk_enc[ 0]); tmp2 = _mm_aesdeclast_si128(tmp2, ctx->rk_enc[ 0]); |
|
|
|
|
|
|
|
tmp1 = _mm_xor_si128 (tmp1, ivec); tmp2 = _mm_xor_si128 (tmp2, old_in1); |
|
|
|
|
|
|
@ -523,7 +543,8 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
_mm_storeu_si128((__m128i*) out, tmp2); out += 16; |
|
|
|
} |
|
|
|
|
|
|
|
if(n) { // one block remaining
|
|
|
|
// one block remaining
|
|
|
|
if(n) { |
|
|
|
__m128i tmp = _mm_loadu_si128((__m128i*)in); |
|
|
|
__m128i old_in = tmp; |
|
|
|
|
|
|
@ -545,7 +566,7 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
tmp = _mm_aesdec_si128(tmp, ctx->rk_dec[13]); |
|
|
|
} |
|
|
|
} |
|
|
|
tmp = _mm_aesdeclast_si128(tmp, ctx->rk_enc[ 0]); |
|
|
|
tmp = _mm_aesdeclast_si128 (tmp, ctx->rk_enc[ 0]); |
|
|
|
|
|
|
|
tmp = _mm_xor_si128 (tmp, ivec); |
|
|
|
|
|
|
@ -560,9 +581,9 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { |
|
|
|
|
|
|
|
// allocate context...
|
|
|
|
*ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t)); |
|
|
|
if (!(*ctx)) |
|
|
|
if(!(*ctx)) |
|
|
|
return -1; |
|
|
|
// ...and fill her up
|
|
|
|
// ...and fill her up:
|
|
|
|
|
|
|
|
// initialize data structures
|
|
|
|
|
|
|
@ -581,12 +602,14 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { |
|
|
|
|
|
|
|
// key materiel handling
|
|
|
|
aes_internal_key_setup ( *ctx, key, 8 * key_size); |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#else // plain C --------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
// rijndael-alg-fst.c version 3.0 (December 2000)
|
|
|
|
// optimised ANSI C code for the Rijndael cipher (now AES)
|
|
|
|
// original authors: Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
|
|
|
@ -968,11 +991,8 @@ static const uint32_t rcon[] = { |
|
|
|
#define m3(x) ((x) & 0xff000000) |
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Expand the cipher key into the encryption key schedule. |
|
|
|
* |
|
|
|
* @return the number of rounds for the given cipher key size. |
|
|
|
*/ |
|
|
|
// expand the cipher key into the encryption key schedule and
|
|
|
|
// return the number of rounds for the given cipher key size
|
|
|
|
static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) { |
|
|
|
|
|
|
|
int i = 0; |
|
|
@ -982,8 +1002,8 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_ |
|
|
|
rk[1] = GETU32(cipherKey + 4); |
|
|
|
rk[2] = GETU32(cipherKey + 8); |
|
|
|
rk[3] = GETU32(cipherKey + 12); |
|
|
|
if (keyBits == 128) { |
|
|
|
for (;;) { |
|
|
|
if(keyBits == 128) { |
|
|
|
for(;;) { |
|
|
|
temp = rk[3]; |
|
|
|
rk[4] = rk[0] ^ |
|
|
|
(Te4[b2(temp)] & 0xff000000) ^ |
|
|
@ -994,7 +1014,7 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_ |
|
|
|
rk[5] = rk[1] ^ rk[4]; |
|
|
|
rk[6] = rk[2] ^ rk[5]; |
|
|
|
rk[7] = rk[3] ^ rk[6]; |
|
|
|
if (++i == 10) { |
|
|
|
if(++i == 10) { |
|
|
|
return 10; |
|
|
|
} |
|
|
|
rk += 4; |
|
|
@ -1002,8 +1022,8 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_ |
|
|
|
} |
|
|
|
rk[4] = GETU32(cipherKey + 16); |
|
|
|
rk[5] = GETU32(cipherKey + 20); |
|
|
|
if (keyBits == 192) { |
|
|
|
for (;;) { |
|
|
|
if(keyBits == 192) { |
|
|
|
for(;;) { |
|
|
|
temp = rk[ 5]; |
|
|
|
rk[ 6] = rk[ 0] ^ |
|
|
|
(Te4[b2(temp)] & 0xff000000) ^ |
|
|
@ -1014,7 +1034,7 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_ |
|
|
|
rk[ 7] = rk[ 1] ^ rk[ 6]; |
|
|
|
rk[ 8] = rk[ 2] ^ rk[ 7]; |
|
|
|
rk[ 9] = rk[ 3] ^ rk[ 8]; |
|
|
|
if (++i == 8) { |
|
|
|
if(++i == 8) { |
|
|
|
return 12; |
|
|
|
} |
|
|
|
rk[10] = rk[ 4] ^ rk[ 9]; |
|
|
@ -1024,8 +1044,8 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_ |
|
|
|
} |
|
|
|
rk[6] = GETU32(cipherKey + 24); |
|
|
|
rk[7] = GETU32(cipherKey + 28); |
|
|
|
if (keyBits == 256) { |
|
|
|
for (;;) { |
|
|
|
if(keyBits == 256) { |
|
|
|
for(;;) { |
|
|
|
temp = rk[ 7]; |
|
|
|
rk[ 8] = rk[ 0] ^ |
|
|
|
(Te4[b2(temp)] & 0xff000000) ^ |
|
|
@ -1036,7 +1056,7 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_ |
|
|
|
rk[ 9] = rk[ 1] ^ rk[ 8]; |
|
|
|
rk[10] = rk[ 2] ^ rk[ 9]; |
|
|
|
rk[11] = rk[ 3] ^ rk[10]; |
|
|
|
if (++i == 7) { |
|
|
|
if(++i == 7) { |
|
|
|
return 14; |
|
|
|
} |
|
|
|
temp = rk[11]; |
|
|
@ -1048,22 +1068,19 @@ static int aes_internal_key_setup_enc (uint32_t rk[/*4*(Nr + 1)*/], const uint8_ |
|
|
|
rk[13] = rk[ 5] ^ rk[12]; |
|
|
|
rk[14] = rk[ 6] ^ rk[13]; |
|
|
|
rk[15] = rk[ 7] ^ rk[14]; |
|
|
|
|
|
|
|
rk += 8; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
/**
|
|
|
|
* Expand the cipher key into the decryption key schedule. |
|
|
|
* |
|
|
|
* @return the number of rounds for the given cipher key size. |
|
|
|
*/ |
|
|
|
|
|
|
|
#define INVMIXCOLRK(n) rk[n] = Td0[b0(Te4[b3(rk[n])])] ^ Td1[b0(Te4[b2(rk[n])])] ^ Td2[b0(Te4[b1(rk[n])])] ^ Td3[b0(Te4[b0(rk[n])])] |
|
|
|
|
|
|
|
|
|
|
|
// expand the cipher key into the decryption key schedule and
|
|
|
|
// return the number of rounds for the given cipher key size
|
|
|
|
static int aes_internal_key_setup_dec (uint32_t rk[/*4*(Nr + 1)*/], const uint8_t cipherKey[], int keyBits) { |
|
|
|
|
|
|
|
int Nr, i, j; |
|
|
@ -1072,14 +1089,15 @@ static int aes_internal_key_setup_dec (uint32_t rk[/*4*(Nr + 1)*/], const uint8_ |
|
|
|
// expand the cipher key
|
|
|
|
Nr = aes_internal_key_setup_enc(rk, cipherKey, keyBits); |
|
|
|
// invert the order of the round keys
|
|
|
|
for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { |
|
|
|
for(i = 0, j = 4*Nr; i < j; i += 4, j -= 4) { |
|
|
|
temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp; |
|
|
|
temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp; |
|
|
|
temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; |
|
|
|
temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; |
|
|
|
} |
|
|
|
|
|
|
|
// apply the inverse MixColumn transform to all round keys but the first and the last
|
|
|
|
for (i = 1; i < Nr; i++) { |
|
|
|
for(i = 1; i < Nr; i++) { |
|
|
|
rk += 4; |
|
|
|
INVMIXCOLRK(0); |
|
|
|
INVMIXCOLRK(1); |
|
|
@ -1212,6 +1230,7 @@ int aes_ecb_encrypt (unsigned char *out, const unsigned char *in, aes_context_t |
|
|
|
#define fix_xor(target, source) *(uint32_t*)&(target)[0] = *(uint32_t*)&(target)[0] ^ *(uint32_t*)&(source)[0]; *(uint32_t*)&(target)[4] = *(uint32_t*)&(target)[4] ^ *(uint32_t*)&(source)[4]; \ |
|
|
|
*(uint32_t*)&(target)[8] = *(uint32_t*)&(target)[8] ^ *(uint32_t*)&(source)[8]; *(uint32_t*)&(target)[12] = *(uint32_t*)&(target)[12] ^ *(uint32_t*)&(source)[12]; |
|
|
|
|
|
|
|
|
|
|
|
int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
const unsigned char *iv, aes_context_t *ctx) { |
|
|
|
|
|
|
@ -1227,6 +1246,7 @@ int aes_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
aes_internal_encrypt(ctx->enc_rk, ctx->Nr, tmp, tmp); |
|
|
|
memcpy(&out[i * AES_BLOCK_SIZE], tmp, AES_BLOCK_SIZE); |
|
|
|
} |
|
|
|
|
|
|
|
return n * AES_BLOCK_SIZE; |
|
|
|
} |
|
|
|
|
|
|
@ -1252,13 +1272,14 @@ int aes_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
|
return n * AES_BLOCK_SIZE; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { |
|
|
|
|
|
|
|
// allocate context...
|
|
|
|
*ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t)); |
|
|
|
if (!(*ctx)) |
|
|
|
if(!(*ctx)) |
|
|
|
return -1; |
|
|
|
// ...and fill her up
|
|
|
|
// ...and fill her up:
|
|
|
|
|
|
|
|
// initialize data structures
|
|
|
|
|
|
|
@ -1282,136 +1303,12 @@ int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) { |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#endif // openSSL 1.1, AES-NI, plain C ----------------------------------------------------
|
|
|
|
|
|
|
|
int aes_deinit (aes_context_t *ctx) { |
|
|
|
|
|
|
|
if (ctx) free (ctx); |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// --- for testing ------------------------------------------------------------------------
|
|
|
|
// --- remove when done ---
|
|
|
|
|
|
|
|
/* int aes_init (const unsigned char *key, size_t key_size, aes_context_t **ctx) {
|
|
|
|
|
|
|
|
// allocate context...
|
|
|
|
*ctx = (aes_context_t*) calloc(1, sizeof(aes_context_t)); |
|
|
|
if (!(*ctx)) |
|
|
|
return -1; |
|
|
|
// ...and fill her up
|
|
|
|
#endif // openSSL 1.1, AES-NI, plain C ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
// initialize data structures
|
|
|
|
#ifdef HAVE_OPENSSL_1_1 |
|
|
|
if(!((*ctx)->enc_ctx = EVP_CIPHER_CTX_new())) { |
|
|
|
traceEvent(TRACE_ERROR, "aes_init openssl's evp_* encryption context creation failed: %s", |
|
|
|
openssl_err_as_string()); |
|
|
|
return(-1); |
|
|
|
} |
|
|
|
if(!((*ctx)->dec_ctx = EVP_CIPHER_CTX_new())) { |
|
|
|
traceEvent(TRACE_ERROR, "aes_init openssl's evp_* decryption context creation failed: %s", |
|
|
|
openssl_err_as_string()); |
|
|
|
return(-1); |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
// check key size and make key size (given in bytes) dependant settings
|
|
|
|
switch(key_size) { |
|
|
|
case AES128_KEY_BYTES: // 128 bit key size
|
|
|
|
#ifdef HAVE_OPENSSL_1_1 |
|
|
|
(*ctx)->cipher = EVP_aes_128_cbc(); |
|
|
|
#endif |
|
|
|
break; |
|
|
|
case AES192_KEY_BYTES: // 192 bit key size
|
|
|
|
#ifdef HAVE_OPENSSL_1_1 |
|
|
|
(*ctx)->cipher = EVP_aes_192_cbc(); |
|
|
|
#endif |
|
|
|
break; |
|
|
|
case AES256_KEY_BYTES: // 256 bit key size
|
|
|
|
#ifdef HAVE_OPENSSL_1_1 |
|
|
|
(*ctx)->cipher = EVP_aes_256_cbc(); |
|
|
|
#endif |
|
|
|
break; |
|
|
|
default: |
|
|
|
traceEvent(TRACE_ERROR, "aes_init invalid key size %u\n", key_size); |
|
|
|
return -1; |
|
|
|
} |
|
|
|
int aes_deinit (aes_context_t *ctx) { |
|
|
|
|
|
|
|
// key materiel handling
|
|
|
|
#ifdef HAVE_OPENSSL_1_1 |
|
|
|
memcpy((*ctx)->key, key, key_size); |
|
|
|
AES_set_decrypt_key(key, key_size * 8, &((*ctx)->ecb_dec_key)); |
|
|
|
#else |
|
|
|
AES_set_encrypt_key(key, key_size * 8, &((*ctx)->enc_key)); |
|
|
|
AES_set_decrypt_key(key, key_size * 8, &((*ctx)->dec_key)); |
|
|
|
#endif |
|
|
|
if(ctx) free(ctx); |
|
|
|
|
|
|
|
return 0; |
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
#ifdef TEST_AES |
|
|
|
int main () { |
|
|
|
|
|
|
|
aes_context_t *ctx; |
|
|
|
|
|
|
|
|
|
|
|
// *ctx = malloc(sizeof(aes_context_t));
|
|
|
|
|
|
|
|
|
|
|
|
// uint8_t key[32] = {0};
|
|
|
|
// 128 bit key 0 --> 0336763e966d92595a567cc9ce537f5e
|
|
|
|
// uint8_t pt[16] = {0xf3, 0x44, 0x81, 0xec, 0x3c, 0xc6, 0x27, 0xba,
|
|
|
|
// 0xcd, 0x5d, 0xc3, 0xfb, 0x08, 0xf2, 0x73, 0xe6 };
|
|
|
|
|
|
|
|
// 256 bit key 0 --> 5c9d844ed46f9885085e5d6a4f94c7d7
|
|
|
|
// uint8_t pt[16] = {0x01, 0x47, 0x30, 0xf8, 0x0a, 0xc6, 0x25, 0xfe,
|
|
|
|
// 0x84, 0xf0, 0x26, 0xc6, 0x0b, 0xfd, 0x54, 0x7d };
|
|
|
|
|
|
|
|
uint8_t pt[16] = {0}; |
|
|
|
// 0 pt --> 6d251e6944b051e04eaa6fb4dbf78465
|
|
|
|
uint8_t key[16] = {0x10, 0xa5, 0x88, 0x69, 0xd7, 0x4b, 0xe5, 0xa3, |
|
|
|
0x74, 0xcf, 0x86, 0x7c, 0xfb, 0x47, 0x38, 0x59 }; |
|
|
|
|
|
|
|
uint8_t ct[16] = {0}; |
|
|
|
int i; |
|
|
|
|
|
|
|
// aes_internal_key_setup (ctx, key, 8 * sizeof(key));
|
|
|
|
aes_init (key, sizeof(key), &ctx); |
|
|
|
|
|
|
|
printf ("Nr = %u\n",(ctx)->Nr); |
|
|
|
memset (pt, 0, 16); |
|
|
|
|
|
|
|
|
|
|
|
for(i = 0; i < 16; i++) |
|
|
|
printf ("%02x",pt[i]); |
|
|
|
printf ("--- pt\n"); |
|
|
|
|
|
|
|
aes_internal_encrypt((ctx), pt, ct); |
|
|
|
memset (pt, 4, 16); |
|
|
|
|
|
|
|
for(i = 0; i < 16; i++) |
|
|
|
printf ("%02x",ct[i]); |
|
|
|
printf ("--- ct\n"); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
printf ("Nr = %u\n",(ctx)->Nr); |
|
|
|
printf ("Nr = %u\n",(ctx)->Nr); |
|
|
|
|
|
|
|
aes_internal_decrypt((ctx), ct, pt); |
|
|
|
memset (ct, 9, 16); |
|
|
|
for(i = 0; i < 16; i++) |
|
|
|
printf ("%02x",pt[i]); |
|
|
|
printf ("--- pt\n"); |
|
|
|
|
|
|
|
aes_internal_encrypt((ctx), pt, ct); |
|
|
|
|
|
|
|
for(i = 0; i < 16; i++) |
|
|
|
printf ("%02x",ct[i]); |
|
|
|
printf ("--- ct\n"); |
|
|
|
} |
|
|
|
#endif |
|
|
|
*/ |
|
|
|