Browse Source

sse code clean up

pull/404/head
Logan007 4 years ago
parent
commit
7f8d1ac5a2
  1. 47
      src/cc20.c

47
src/cc20.c

@ -128,10 +128,15 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len,
CC20_ODD_ROUND (A, B, C, D); \ CC20_ODD_ROUND (A, B, C, D); \
CC20_EVEN_ROUND(A, B, C, D) CC20_EVEN_ROUND(A, B, C, D)
#define STOREXOR(O,I,X) \
_mm_storeu_si128 ((__m128i*)O, \
_mm_xor_si128 (_mm_loadu_si128((__m128i*)I), X)); \
I += 16; O += 16 \
int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len, int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len,
const unsigned char *iv, cc20_context_t *ctx) { const unsigned char *iv, cc20_context_t *ctx) {
__m128i a, b, c, d, k0, k1, k2, k3, k4, k5, k6, k7; __m128i a, b, c, d, k0, k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11;
uint8_t *keystream8 = (uint8_t*)ctx->keystream32; uint8_t *keystream8 = (uint8_t*)ctx->keystream32;
@ -162,31 +167,8 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len,
k0 = ADD(k0, a); k1 = ADD(k1, b); k2 = ADD(k2, c); k3 = ADD(k3, d); k0 = ADD(k0, a); k1 = ADD(k1, b); k2 = ADD(k2, c); k3 = ADD(k3, d);
k4 = ADD(k4, a); k5 = ADD(k5, b); k6 = ADD(k6, c); k7 = ADD(k7, d); k7 = ADD(k7, ONE); k4 = ADD(k4, a); k5 = ADD(k5, b); k6 = ADD(k6, c); k7 = ADD(k7, d); k7 = ADD(k7, ONE);
_mm_storeu_si128 ((__m128i*)out, STOREXOR(out, in, k0); STOREXOR(out, in, k1); STOREXOR(out, in, k2); STOREXOR(out, in, k3);
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k0)); STOREXOR(out, in, k4); STOREXOR(out, in, k5); STOREXOR(out, in, k6); STOREXOR(out, in, k7);
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k1));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k2));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k3));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k4));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k5));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k6));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k7));
in += 16; out += 16;
// increment counter, make sure it is and stays little endian in memory // increment counter, make sure it is and stays little endian in memory
d = ADD(d, TWO); d = ADD(d, TWO);
@ -212,18 +194,7 @@ int cc20_crypt (unsigned char *out, const unsigned char *in, size_t in_len,
k0 = ADD(k0, a); k1 = ADD(k1, b); k2 = ADD(k2, c); k3 = ADD(k3, d); k0 = ADD(k0, a); k1 = ADD(k1, b); k2 = ADD(k2, c); k3 = ADD(k3, d);
_mm_storeu_si128 ((__m128i*)out, STOREXOR(out, in, k0); STOREXOR(out, in, k1); STOREXOR(out, in, k2); STOREXOR(out, in, k3);
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k0));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k1));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k2));
in += 16; out += 16;
_mm_storeu_si128 ((__m128i*)out,
_mm_xor_si128 (_mm_loadu_si128((__m128i*)in), k3));
in += 16; out += 16;
// increment counter, make sure it is and stays little endian in memory // increment counter, make sure it is and stays little endian in memory
d = ADD(d, ONE); d = ADD(d, ONE);

Loading…
Cancel
Save