|
@ -22,29 +22,29 @@ |
|
|
// published on github/drewcsillag/twofish
|
|
|
// published on github/drewcsillag/twofish
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
/**
|
|
|
The MIT License (MIT) |
|
|
* The MIT License (MIT) |
|
|
|
|
|
* |
|
|
Copyright (c) 2015 Andrew T. Csillag |
|
|
* Copyright (c) 2015 Andrew T. Csillag |
|
|
|
|
|
* |
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy |
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy |
|
|
of this software and associated documentation files (the "Software"), to deal |
|
|
* of this software and associated documentation files (the "Software"), to deal |
|
|
in the Software without restriction, including without limitation the rights |
|
|
* in the Software without restriction, including without limitation the rights |
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
|
copies of the Software, and to permit persons to whom the Software is |
|
|
* copies of the Software, and to permit persons to whom the Software is |
|
|
furnished to do so, subject to the following conditions: |
|
|
* furnished to do so, subject to the following conditions: |
|
|
|
|
|
* |
|
|
The above copyright notice and this permission notice shall be included in |
|
|
* The above copyright notice and this permission notice shall be included in |
|
|
all copies or substantial portions of the Software. |
|
|
* all copies or substantial portions of the Software. |
|
|
|
|
|
* |
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|
|
THE SOFTWARE. |
|
|
* THE SOFTWARE. |
|
|
*/ |
|
|
*/ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "tf.h" |
|
|
#include "tf.h" |
|
@ -123,6 +123,7 @@ const uint8_t multEF[] = { 0x00, 0xEF, 0xB7, 0x58, 0x07, 0xE8, 0xB0, 0x5F, 0x0E, |
|
|
0xA8, 0x47, 0x1F, 0xF0, 0xAF, 0x40, 0x18, 0xF7, 0xA6, 0x49, 0x11, 0xFE, 0xA1, 0x4E, 0x16, 0xF9, |
|
|
0xA8, 0x47, 0x1F, 0xF0, 0xAF, 0x40, 0x18, 0xF7, 0xA6, 0x49, 0x11, 0xFE, 0xA1, 0x4E, 0x16, 0xF9, |
|
|
0xB4, 0x5B, 0x03, 0xEC, 0xB3, 0x5C, 0x04, 0xEB, 0xBA, 0x55, 0x0D, 0xE2, 0xBD, 0x52, 0x0A, 0xE5 }; |
|
|
0xB4, 0x5B, 0x03, 0xEC, 0xB3, 0x5C, 0x04, 0xEB, 0xBA, 0x55, 0x0D, 0xE2, 0xBD, 0x52, 0x0A, 0xE5 }; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define RS_MOD 0x14D |
|
|
#define RS_MOD 0x14D |
|
|
#define RHO 0x01010101L |
|
|
#define RHO 0x01010101L |
|
|
|
|
|
|
|
@ -140,463 +141,479 @@ const uint8_t multEF[] = { 0x00, 0xEF, 0xB7, 0x58, 0x07, 0xE8, 0xB0, 0x5F, 0x0E, |
|
|
#define U8S_TO_U32(r0, r1, r2, r3) ((r0 << 24) ^ (r1 << 16) ^ (r2 << 8) ^ r3) |
|
|
#define U8S_TO_U32(r0, r1, r2, r3) ((r0 << 24) ^ (r1 << 16) ^ (r2 << 8) ^ r3) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* multiply two polynomials represented as u32's, actually called with bytes */ |
|
|
// multiply two polynomials represented as u32's, actually called with bytes
|
|
|
uint32_t polyMult(uint32_t a, uint32_t b) { |
|
|
uint32_t polyMult(uint32_t a, uint32_t b) { |
|
|
|
|
|
|
|
|
uint32_t t=0; |
|
|
uint32_t t=0; |
|
|
|
|
|
|
|
|
while(a) { |
|
|
while(a) { |
|
|
if(a&1) t^=b; |
|
|
if(a & 1) |
|
|
b <<= 1; |
|
|
t^=b; |
|
|
a >>= 1; |
|
|
b <<= 1; |
|
|
} |
|
|
a >>= 1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
return t; |
|
|
return t; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* take the polynomial t and return the t % modulus in GF(256) */ |
|
|
// take the polynomial t and return the t % modulus in GF(256)
|
|
|
uint32_t gfMod(uint32_t t, uint32_t modulus) { |
|
|
uint32_t gfMod(uint32_t t, uint32_t modulus) { |
|
|
|
|
|
|
|
|
int i; |
|
|
int i; |
|
|
uint32_t tt; |
|
|
uint32_t tt; |
|
|
|
|
|
|
|
|
modulus <<= 7; |
|
|
modulus <<= 7; |
|
|
for(i = 0; i < 8; i++) { |
|
|
for(i = 0; i < 8; i++) { |
|
|
tt = t ^ modulus; |
|
|
tt = t ^ modulus; |
|
|
if(tt < t) t = tt; |
|
|
if(tt < t) |
|
|
modulus >>= 1; |
|
|
t = tt; |
|
|
} |
|
|
modulus >>= 1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
return t; |
|
|
return t; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*multiply a and b and return the modulus */ |
|
|
// multiply a and b and return the modulus
|
|
|
#define gfMult(a, b, modulus) gfMod(polyMult(a, b), modulus) |
|
|
#define gfMult(a, b, modulus) gfMod(polyMult(a, b), modulus) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* return a u32 containing the result of multiplying the RS Code matrix by the sd matrix */ |
|
|
// return a u32 containing the result of multiplying the RS Code matrix by the sd matrix
|
|
|
uint32_t RSMatrixMultiply(uint8_t sd[8]) { |
|
|
uint32_t RSMatrixMultiply(uint8_t sd[8]) { |
|
|
|
|
|
|
|
|
int j, k; |
|
|
int j, k; |
|
|
uint8_t t; |
|
|
uint8_t t; |
|
|
uint8_t result[4]; |
|
|
uint8_t result[4]; |
|
|
|
|
|
|
|
|
for(j = 0; j < 4; j++) { |
|
|
for(j = 0; j < 4; j++) { |
|
|
t = 0; |
|
|
t = 0; |
|
|
for(k = 0; k < 8; k++) { |
|
|
for(k = 0; k < 8; k++) { |
|
|
t ^= gfMult(RS[j][k], sd[k], RS_MOD); |
|
|
t ^= gfMult(RS[j][k], sd[k], RS_MOD); |
|
|
|
|
|
} |
|
|
|
|
|
result[3-j] = t; |
|
|
} |
|
|
} |
|
|
result[3-j] = t; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return U8ARRAY_TO_U32(result); |
|
|
return U8ARRAY_TO_U32(result); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* the Zero-keyed h function (used by the key setup routine) */ |
|
|
// the Zero-keyed h function (used by the key setup routine)
|
|
|
uint32_t h(uint32_t X, uint32_t L[4], int k) { |
|
|
uint32_t h(uint32_t X, uint32_t L[4], int k) { |
|
|
|
|
|
|
|
|
uint8_t y0, y1, y2, y3; |
|
|
uint8_t y0, y1, y2, y3; |
|
|
uint8_t z0, z1, z2, z3; |
|
|
uint8_t z0, z1, z2, z3; |
|
|
|
|
|
|
|
|
y0 = b0(X); |
|
|
y0 = b0(X); |
|
|
y1 = b1(X); |
|
|
y1 = b1(X); |
|
|
y2 = b2(X); |
|
|
y2 = b2(X); |
|
|
y3 = b3(X); |
|
|
y3 = b3(X); |
|
|
|
|
|
|
|
|
switch(k) { |
|
|
switch(k) { |
|
|
case 4: |
|
|
case 4: |
|
|
y0 = Q1[y0] ^ b0(L[3]); |
|
|
y0 = Q1[y0] ^ b0(L[3]); |
|
|
y1 = Q0[y1] ^ b1(L[3]); |
|
|
y1 = Q0[y1] ^ b1(L[3]); |
|
|
y2 = Q0[y2] ^ b2(L[3]); |
|
|
y2 = Q0[y2] ^ b2(L[3]); |
|
|
y3 = Q1[y3] ^ b3(L[3]); |
|
|
y3 = Q1[y3] ^ b3(L[3]); |
|
|
case 3: |
|
|
case 3: |
|
|
y0 = Q1[y0] ^ b0(L[2]); |
|
|
y0 = Q1[y0] ^ b0(L[2]); |
|
|
y1 = Q1[y1] ^ b1(L[2]); |
|
|
y1 = Q1[y1] ^ b1(L[2]); |
|
|
y2 = Q0[y2] ^ b2(L[2]); |
|
|
y2 = Q0[y2] ^ b2(L[2]); |
|
|
y3 = Q0[y3] ^ b3(L[2]); |
|
|
y3 = Q0[y3] ^ b3(L[2]); |
|
|
case 2: |
|
|
case 2: |
|
|
y0 = Q1[ Q0 [ Q0[y0] ^ b0(L[1]) ] ^ b0(L[0]) ]; |
|
|
y0 = Q1[ Q0 [ Q0[y0] ^ b0(L[1]) ] ^ b0(L[0]) ]; |
|
|
y1 = Q0[ Q0 [ Q1[y1] ^ b1(L[1]) ] ^ b1(L[0]) ]; |
|
|
y1 = Q0[ Q0 [ Q1[y1] ^ b1(L[1]) ] ^ b1(L[0]) ]; |
|
|
y2 = Q1[ Q1 [ Q0[y2] ^ b2(L[1]) ] ^ b2(L[0]) ]; |
|
|
y2 = Q1[ Q1 [ Q0[y2] ^ b2(L[1]) ] ^ b2(L[0]) ]; |
|
|
y3 = Q0[ Q1 [ Q1[y3] ^ b3(L[1]) ] ^ b3(L[0]) ]; |
|
|
y3 = Q0[ Q1 [ Q1[y3] ^ b3(L[1]) ] ^ b3(L[0]) ]; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
/* inline the MDS matrix multiply */ |
|
|
// inline the MDS matrix multiply
|
|
|
z0 = multEF[y0] ^ y1 ^ multEF[y2] ^ mult5B[y3]; |
|
|
z0 = multEF[y0] ^ y1 ^ multEF[y2] ^ mult5B[y3]; |
|
|
z1 = multEF[y0] ^ mult5B[y1] ^ y2 ^ multEF[y3]; |
|
|
z1 = multEF[y0] ^ mult5B[y1] ^ y2 ^ multEF[y3]; |
|
|
z2 = mult5B[y0] ^ multEF[y1] ^ multEF[y2] ^ y3; |
|
|
z2 = mult5B[y0] ^ multEF[y1] ^ multEF[y2] ^ y3; |
|
|
z3 = y0 ^ multEF[y1] ^ mult5B[y2] ^ mult5B[y3]; |
|
|
z3 = y0 ^ multEF[y1] ^ mult5B[y2] ^ mult5B[y3]; |
|
|
|
|
|
|
|
|
return U8S_TO_U32(z0, z1, z2, z3); |
|
|
return U8S_TO_U32(z0, z1, z2, z3); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* given the Sbox keys, create the fully keyed QF */ |
|
|
// given the Sbox keys, create the fully keyed QF
|
|
|
void fullKey(uint32_t L[4], int k, uint32_t QF[4][256]) { |
|
|
void fullKey(uint32_t L[4], int k, uint32_t QF[4][256]) { |
|
|
|
|
|
|
|
|
uint8_t y0, y1, y2, y3; |
|
|
uint8_t y0, y1, y2, y3; |
|
|
int i; |
|
|
int i; |
|
|
|
|
|
|
|
|
/* for all input values to the Q permutations */ |
|
|
// for all input values to the Q permutations
|
|
|
for(i=0; i<256; i++) { |
|
|
for(i = 0; i < 256; i++) { |
|
|
/* run the Q permutations */ |
|
|
// run the Q permutations
|
|
|
y0 = i; y1=i; y2=i; y3=i; |
|
|
y0 = i; y1 = i; y2 = i; y3 = i; |
|
|
switch(k) { |
|
|
switch(k) { |
|
|
case 4: |
|
|
case 4: |
|
|
y0 = Q1[y0] ^ b0(L[3]); |
|
|
y0 = Q1[y0] ^ b0(L[3]); |
|
|
y1 = Q0[y1] ^ b1(L[3]); |
|
|
y1 = Q0[y1] ^ b1(L[3]); |
|
|
y2 = Q0[y2] ^ b2(L[3]); |
|
|
y2 = Q0[y2] ^ b2(L[3]); |
|
|
y3 = Q1[y3] ^ b3(L[3]); |
|
|
y3 = Q1[y3] ^ b3(L[3]); |
|
|
case 3: |
|
|
case 3: |
|
|
y0 = Q1[y0] ^ b0(L[2]); |
|
|
y0 = Q1[y0] ^ b0(L[2]); |
|
|
y1 = Q1[y1] ^ b1(L[2]); |
|
|
y1 = Q1[y1] ^ b1(L[2]); |
|
|
y2 = Q0[y2] ^ b2(L[2]); |
|
|
y2 = Q0[y2] ^ b2(L[2]); |
|
|
y3 = Q0[y3] ^ b3(L[2]); |
|
|
y3 = Q0[y3] ^ b3(L[2]); |
|
|
case 2: |
|
|
case 2: |
|
|
y0 = Q1[ Q0 [ Q0[y0] ^ b0(L[1]) ] ^ b0(L[0]) ]; |
|
|
y0 = Q1[ Q0 [ Q0[y0] ^ b0(L[1]) ] ^ b0(L[0]) ]; |
|
|
y1 = Q0[ Q0 [ Q1[y1] ^ b1(L[1]) ] ^ b1(L[0]) ]; |
|
|
y1 = Q0[ Q0 [ Q1[y1] ^ b1(L[1]) ] ^ b1(L[0]) ]; |
|
|
y2 = Q1[ Q1 [ Q0[y2] ^ b2(L[1]) ] ^ b2(L[0]) ]; |
|
|
y2 = Q1[ Q1 [ Q0[y2] ^ b2(L[1]) ] ^ b2(L[0]) ]; |
|
|
y3 = Q0[ Q1 [ Q1[y3] ^ b3(L[1]) ] ^ b3(L[0]) ]; |
|
|
y3 = Q0[ Q1 [ Q1[y3] ^ b3(L[1]) ] ^ b3(L[0]) ]; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// now do the partial MDS matrix multiplies
|
|
|
|
|
|
QF[0][i] = ((multEF[y0] << 24) |
|
|
|
|
|
| (multEF[y0] << 16) |
|
|
|
|
|
| (mult5B[y0] << 8) |
|
|
|
|
|
| y0); |
|
|
|
|
|
QF[1][i] = ((y1 << 24) |
|
|
|
|
|
| (mult5B[y1] << 16) |
|
|
|
|
|
| (multEF[y1] << 8) |
|
|
|
|
|
| multEF[y1]); |
|
|
|
|
|
QF[2][i] = ((multEF[y2] << 24) |
|
|
|
|
|
| (y2 << 16) |
|
|
|
|
|
| (multEF[y2] << 8) |
|
|
|
|
|
| mult5B[y2]); |
|
|
|
|
|
QF[3][i] = ((mult5B[y3] << 24) |
|
|
|
|
|
| (multEF[y3] << 16) |
|
|
|
|
|
| (y3 << 8) |
|
|
|
|
|
| mult5B[y3]); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
/* now do the partial MDS matrix multiplies */ |
|
|
|
|
|
QF[0][i] = ((multEF[y0] << 24) |
|
|
|
|
|
| (multEF[y0] << 16) |
|
|
|
|
|
| (mult5B[y0] << 8) |
|
|
|
|
|
| y0); |
|
|
|
|
|
QF[1][i] = ((y1 << 24) |
|
|
|
|
|
| (mult5B[y1] << 16) |
|
|
|
|
|
| (multEF[y1] << 8) |
|
|
|
|
|
| multEF[y1]); |
|
|
|
|
|
QF[2][i] = ((multEF[y2] << 24) |
|
|
|
|
|
| (y2 << 16) |
|
|
|
|
|
| (multEF[y2] << 8) |
|
|
|
|
|
| mult5B[y2]); |
|
|
|
|
|
QF[3][i] = ((mult5B[y3] << 24) |
|
|
|
|
|
| (multEF[y3] << 16) |
|
|
|
|
|
| (y3 << 8) |
|
|
|
|
|
| mult5B[y3]); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------------------
|
|
|
// ----------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* fully keyed h (aka g) function */ |
|
|
// fully keyed h (aka g) function
|
|
|
#define fkh(X) (ctx->QF[0][b0(X)]^ctx->QF[1][b1(X)]^ctx->QF[2][b2(X)]^ctx->QF[3][b3(X)]) |
|
|
#define fkh(X) (ctx->QF[0][b0(X)]^ctx->QF[1][b1(X)]^ctx->QF[2][b2(X)]^ctx->QF[3][b3(X)]) |
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
/* one encryption round */ |
|
|
// ----------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// one encryption round
|
|
|
#define ENC_ROUND(R0, R1, R2, R3, round) \ |
|
|
#define ENC_ROUND(R0, R1, R2, R3, round) \ |
|
|
T0 = fkh(R0); \ |
|
|
T0 = fkh(R0); \ |
|
|
T1 = fkh(ROL(R1, 8)); \ |
|
|
T1 = fkh(ROL(R1, 8)); \ |
|
|
R2 = ROR(R2 ^ (T1 + T0 + ctx->K[2*round+8]), 1); \ |
|
|
R2 = ROR(R2 ^ (T1 + T0 + ctx->K[2*round+8]), 1); \ |
|
|
R3 = ROL(R3, 1) ^ (2*T1 + T0 + ctx->K[2*round+9]); |
|
|
R3 = ROL(R3, 1) ^ (2*T1 + T0 + ctx->K[2*round+9]); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void twofish_internal_encrypt(uint8_t PT[16], tf_context_t *ctx) { |
|
|
void twofish_internal_encrypt(uint8_t PT[16], tf_context_t *ctx) { |
|
|
|
|
|
|
|
|
uint32_t R0, R1, R2, R3; |
|
|
uint32_t R0, R1, R2, R3; |
|
|
uint32_t T0, T1; |
|
|
uint32_t T0, T1; |
|
|
|
|
|
|
|
|
/* load/byteswap/whiten input */ |
|
|
// load/byteswap/whiten input
|
|
|
R3 = ctx->K[3] ^ le32toh(((uint32_t*)PT)[3]); |
|
|
R3 = ctx->K[3] ^ le32toh(((uint32_t*)PT)[3]); |
|
|
R2 = ctx->K[2] ^ le32toh(((uint32_t*)PT)[2]); |
|
|
R2 = ctx->K[2] ^ le32toh(((uint32_t*)PT)[2]); |
|
|
R1 = ctx->K[1] ^ le32toh(((uint32_t*)PT)[1]); |
|
|
R1 = ctx->K[1] ^ le32toh(((uint32_t*)PT)[1]); |
|
|
R0 = ctx->K[0] ^ le32toh(((uint32_t*)PT)[0]); |
|
|
R0 = ctx->K[0] ^ le32toh(((uint32_t*)PT)[0]); |
|
|
|
|
|
|
|
|
ENC_ROUND(R0, R1, R2, R3, 0); |
|
|
ENC_ROUND(R0, R1, R2, R3, 0); |
|
|
ENC_ROUND(R2, R3, R0, R1, 1); |
|
|
ENC_ROUND(R2, R3, R0, R1, 1); |
|
|
ENC_ROUND(R0, R1, R2, R3, 2); |
|
|
ENC_ROUND(R0, R1, R2, R3, 2); |
|
|
ENC_ROUND(R2, R3, R0, R1, 3); |
|
|
ENC_ROUND(R2, R3, R0, R1, 3); |
|
|
ENC_ROUND(R0, R1, R2, R3, 4); |
|
|
ENC_ROUND(R0, R1, R2, R3, 4); |
|
|
ENC_ROUND(R2, R3, R0, R1, 5); |
|
|
ENC_ROUND(R2, R3, R0, R1, 5); |
|
|
ENC_ROUND(R0, R1, R2, R3, 6); |
|
|
ENC_ROUND(R0, R1, R2, R3, 6); |
|
|
ENC_ROUND(R2, R3, R0, R1, 7); |
|
|
ENC_ROUND(R2, R3, R0, R1, 7); |
|
|
ENC_ROUND(R0, R1, R2, R3, 8); |
|
|
ENC_ROUND(R0, R1, R2, R3, 8); |
|
|
ENC_ROUND(R2, R3, R0, R1, 9); |
|
|
ENC_ROUND(R2, R3, R0, R1, 9); |
|
|
ENC_ROUND(R0, R1, R2, R3, 10); |
|
|
ENC_ROUND(R0, R1, R2, R3, 10); |
|
|
ENC_ROUND(R2, R3, R0, R1, 11); |
|
|
ENC_ROUND(R2, R3, R0, R1, 11); |
|
|
ENC_ROUND(R0, R1, R2, R3, 12); |
|
|
ENC_ROUND(R0, R1, R2, R3, 12); |
|
|
ENC_ROUND(R2, R3, R0, R1, 13); |
|
|
ENC_ROUND(R2, R3, R0, R1, 13); |
|
|
ENC_ROUND(R0, R1, R2, R3, 14); |
|
|
ENC_ROUND(R0, R1, R2, R3, 14); |
|
|
ENC_ROUND(R2, R3, R0, R1, 15); |
|
|
ENC_ROUND(R2, R3, R0, R1, 15); |
|
|
|
|
|
|
|
|
/* load/byteswap/whiten output */ |
|
|
// whiten/byteswap/store output
|
|
|
((uint32_t*)PT)[3] = htole32(R1 ^ ctx->K[7]); |
|
|
((uint32_t*)PT)[3] = htole32(R1 ^ ctx->K[7]); |
|
|
((uint32_t*)PT)[2] = htole32(R0 ^ ctx->K[6]); |
|
|
((uint32_t*)PT)[2] = htole32(R0 ^ ctx->K[6]); |
|
|
((uint32_t*)PT)[1] = htole32(R3 ^ ctx->K[5]); |
|
|
((uint32_t*)PT)[1] = htole32(R3 ^ ctx->K[5]); |
|
|
((uint32_t*)PT)[0] = htole32(R2 ^ ctx->K[4]); |
|
|
((uint32_t*)PT)[0] = htole32(R2 ^ ctx->K[4]); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
/* one decryption round */ |
|
|
// ----------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// one decryption round
|
|
|
#define DEC_ROUND(R0, R1, R2, R3, round) \ |
|
|
#define DEC_ROUND(R0, R1, R2, R3, round) \ |
|
|
T0 = fkh(R0); \ |
|
|
T0 = fkh(R0); \ |
|
|
T1 = fkh(ROL(R1, 8)); \ |
|
|
T1 = fkh(ROL(R1, 8)); \ |
|
|
R2 = ROL(R2, 1) ^ (T0 + T1 + ctx->K[2*round+8]); \ |
|
|
R2 = ROL(R2, 1) ^ (T0 + T1 + ctx->K[2*round+8]); \ |
|
|
R3 = ROR(R3 ^ (T0 + 2*T1 + ctx->K[2*round+9]), 1); |
|
|
R3 = ROR(R3 ^ (T0 + 2*T1 + ctx->K[2*round+9]), 1); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
void twofish_internal_decrypt(uint8_t PT[16], const uint8_t CT[16], tf_context_t *ctx) { |
|
|
void twofish_internal_decrypt(uint8_t PT[16], const uint8_t CT[16], tf_context_t *ctx) { |
|
|
|
|
|
|
|
|
uint32_t T0, T1; |
|
|
uint32_t T0, T1; |
|
|
uint32_t R0, R1, R2, R3; |
|
|
uint32_t R0, R1, R2, R3; |
|
|
|
|
|
|
|
|
/* load/byteswap/whiten input */ |
|
|
// load/byteswap/whiten input
|
|
|
R3 = ctx->K[7] ^ le32toh(((uint32_t*)CT)[3]); |
|
|
R3 = ctx->K[7] ^ le32toh(((uint32_t*)CT)[3]); |
|
|
R2 = ctx->K[6] ^ le32toh(((uint32_t*)CT)[2]); |
|
|
R2 = ctx->K[6] ^ le32toh(((uint32_t*)CT)[2]); |
|
|
R1 = ctx->K[5] ^ le32toh(((uint32_t*)CT)[1]); |
|
|
R1 = ctx->K[5] ^ le32toh(((uint32_t*)CT)[1]); |
|
|
R0 = ctx->K[4] ^ le32toh(((uint32_t*)CT)[0]); |
|
|
R0 = ctx->K[4] ^ le32toh(((uint32_t*)CT)[0]); |
|
|
|
|
|
|
|
|
DEC_ROUND(R0, R1, R2, R3, 15); |
|
|
DEC_ROUND(R0, R1, R2, R3, 15); |
|
|
DEC_ROUND(R2, R3, R0, R1, 14); |
|
|
DEC_ROUND(R2, R3, R0, R1, 14); |
|
|
DEC_ROUND(R0, R1, R2, R3, 13); |
|
|
DEC_ROUND(R0, R1, R2, R3, 13); |
|
|
DEC_ROUND(R2, R3, R0, R1, 12); |
|
|
DEC_ROUND(R2, R3, R0, R1, 12); |
|
|
DEC_ROUND(R0, R1, R2, R3, 11); |
|
|
DEC_ROUND(R0, R1, R2, R3, 11); |
|
|
DEC_ROUND(R2, R3, R0, R1, 10); |
|
|
DEC_ROUND(R2, R3, R0, R1, 10); |
|
|
DEC_ROUND(R0, R1, R2, R3, 9); |
|
|
DEC_ROUND(R0, R1, R2, R3, 9); |
|
|
DEC_ROUND(R2, R3, R0, R1, 8); |
|
|
DEC_ROUND(R2, R3, R0, R1, 8); |
|
|
DEC_ROUND(R0, R1, R2, R3, 7); |
|
|
DEC_ROUND(R0, R1, R2, R3, 7); |
|
|
DEC_ROUND(R2, R3, R0, R1, 6); |
|
|
DEC_ROUND(R2, R3, R0, R1, 6); |
|
|
DEC_ROUND(R0, R1, R2, R3, 5); |
|
|
DEC_ROUND(R0, R1, R2, R3, 5); |
|
|
DEC_ROUND(R2, R3, R0, R1, 4); |
|
|
DEC_ROUND(R2, R3, R0, R1, 4); |
|
|
DEC_ROUND(R0, R1, R2, R3, 3); |
|
|
DEC_ROUND(R0, R1, R2, R3, 3); |
|
|
DEC_ROUND(R2, R3, R0, R1, 2); |
|
|
DEC_ROUND(R2, R3, R0, R1, 2); |
|
|
DEC_ROUND(R0, R1, R2, R3, 1); |
|
|
DEC_ROUND(R0, R1, R2, R3, 1); |
|
|
DEC_ROUND(R2, R3, R0, R1, 0); |
|
|
DEC_ROUND(R2, R3, R0, R1, 0); |
|
|
|
|
|
|
|
|
/* load/byteswap/whiten output */ |
|
|
// whiten/byteswap/store output
|
|
|
((uint32_t*)PT)[3] = htole32(R1 ^ ctx->K[3]); |
|
|
((uint32_t*)PT)[3] = htole32(R1 ^ ctx->K[3]); |
|
|
((uint32_t*)PT)[2] = htole32(R0 ^ ctx->K[2]); |
|
|
((uint32_t*)PT)[2] = htole32(R0 ^ ctx->K[2]); |
|
|
((uint32_t*)PT)[1] = htole32(R3 ^ ctx->K[1]); |
|
|
((uint32_t*)PT)[1] = htole32(R3 ^ ctx->K[1]); |
|
|
((uint32_t*)PT)[0] = htole32(R2 ^ ctx->K[0]); |
|
|
((uint32_t*)PT)[0] = htole32(R2 ^ ctx->K[0]); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------------------
|
|
|
// -------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
/* the key schedule routine */ |
|
|
|
|
|
|
|
|
// the key schedule routine
|
|
|
void keySched(const uint8_t M[], int N, uint32_t **S, uint32_t K[40], int *k) { |
|
|
void keySched(const uint8_t M[], int N, uint32_t **S, uint32_t K[40], int *k) { |
|
|
|
|
|
|
|
|
uint32_t Mo[4], Me[4]; |
|
|
uint32_t Mo[4], Me[4]; |
|
|
int i, j; |
|
|
int i, j; |
|
|
uint8_t vector[8]; |
|
|
uint8_t vector[8]; |
|
|
uint32_t A, B; |
|
|
uint32_t A, B; |
|
|
|
|
|
|
|
|
*k = (N + 63) / 64; |
|
|
*k = (N + 63) / 64; |
|
|
*S = (uint32_t*)malloc(sizeof(uint32_t) * (*k)); |
|
|
*S = (uint32_t*)malloc(sizeof(uint32_t) * (*k)); |
|
|
|
|
|
|
|
|
for(i = 0; i < *k; i++) { |
|
|
for(i = 0; i < *k; i++) { |
|
|
Me[i] = le32toh(((uint32_t*)M)[2*i]); |
|
|
Me[i] = le32toh(((uint32_t*)M)[2*i]); |
|
|
Mo[i] = le32toh(((uint32_t*)M)[2*i+1]); |
|
|
Mo[i] = le32toh(((uint32_t*)M)[2*i+1]); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
for(i = 0; i < *k; i++) { |
|
|
for(i = 0; i < *k; i++) { |
|
|
for(j = 0; j < 4; j++) |
|
|
for(j = 0; j < 4; j++) |
|
|
vector[j] = _b(Me[i], j); |
|
|
vector[j] = _b(Me[i], j); |
|
|
for(j = 0; j < 4; j++) |
|
|
for(j = 0; j < 4; j++) |
|
|
vector[j+4] = _b(Mo[i], j); |
|
|
vector[j+4] = _b(Mo[i], j); |
|
|
(*S)[(*k)-i-1] = RSMatrixMultiply(vector); |
|
|
(*S)[(*k)-i-1] = RSMatrixMultiply(vector); |
|
|
} |
|
|
} |
|
|
for(i = 0; i < 20; i++) { |
|
|
|
|
|
A = h(2*i*RHO, Me, *k); |
|
|
for(i = 0; i < 20; i++) { |
|
|
B = ROL(h(2*i*RHO + RHO, Mo, *k), 8); |
|
|
A = h(2*i*RHO, Me, *k); |
|
|
K[2*i] = A+B; |
|
|
B = ROL(h(2*i*RHO + RHO, Mo, *k), 8); |
|
|
K[2*i+1] = ROL(A + 2*B, 9); |
|
|
K[2*i] = A+B; |
|
|
} |
|
|
K[2*i+1] = ROL(A + 2*B, 9); |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define fix_xor(target, source) *(uint32_t*)&(target)[0] = *(uint32_t*)&(target)[0] ^ *(uint32_t*)&(source)[0]; *(uint32_t*)&(target)[4] = *(uint32_t*)&(target)[4] ^ *(uint32_t*)&(source)[4]; \ |
|
|
#define fix_xor(target, source) *(uint32_t*)&(target)[0] = *(uint32_t*)&(target)[0] ^ *(uint32_t*)&(source)[0]; *(uint32_t*)&(target)[4] = *(uint32_t*)&(target)[4] ^ *(uint32_t*)&(source)[4]; \ |
|
|
*(uint32_t*)&(target)[8] = *(uint32_t*)&(target)[8] ^ *(uint32_t*)&(source)[8]; *(uint32_t*)&(target)[12] = *(uint32_t*)&(target)[12] ^ *(uint32_t*)&(source)[12]; |
|
|
*(uint32_t*)&(target)[8] = *(uint32_t*)&(target)[8] ^ *(uint32_t*)&(source)[8]; *(uint32_t*)&(target)[12] = *(uint32_t*)&(target)[12] ^ *(uint32_t*)&(source)[12]; |
|
|
|
|
|
|
|
|
// -------------------------------------------------------------------------------------
|
|
|
// ----------------------------------------------------------------------------------------------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/** public API **/ |
|
|
// public API
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int tf_ecb_decrypt (unsigned char *out, const unsigned char *in, tf_context_t *ctx) { |
|
|
int tf_ecb_decrypt (unsigned char *out, const unsigned char *in, tf_context_t *ctx) { |
|
|
|
|
|
|
|
|
twofish_internal_decrypt(out, in, ctx); |
|
|
twofish_internal_decrypt(out, in, ctx); |
|
|
return TF_BLOCK_SIZE; |
|
|
|
|
|
|
|
|
return TF_BLOCK_SIZE; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// not used
|
|
|
// not used
|
|
|
int tf_ecb_encrypt (unsigned char *out, const unsigned char *in, tf_context_t *ctx) { |
|
|
int tf_ecb_encrypt (unsigned char *out, const unsigned char *in, tf_context_t *ctx) { |
|
|
|
|
|
|
|
|
memcpy (out, in, TF_BLOCK_SIZE); |
|
|
memcpy(out, in, TF_BLOCK_SIZE); |
|
|
twofish_internal_encrypt(out, ctx); |
|
|
twofish_internal_encrypt(out, ctx); |
|
|
return TF_BLOCK_SIZE; |
|
|
|
|
|
|
|
|
return TF_BLOCK_SIZE; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int tf_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
int tf_cbc_encrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
const unsigned char *iv, tf_context_t *ctx) { |
|
|
const unsigned char *iv, tf_context_t *ctx) { |
|
|
|
|
|
|
|
|
uint8_t tmp[TF_BLOCK_SIZE]; |
|
|
uint8_t tmp[TF_BLOCK_SIZE]; |
|
|
size_t i; |
|
|
size_t i; |
|
|
size_t n; |
|
|
size_t n; |
|
|
|
|
|
|
|
|
|
|
|
memcpy(tmp, iv, TF_BLOCK_SIZE); |
|
|
|
|
|
|
|
|
memcpy(tmp, iv, TF_BLOCK_SIZE); |
|
|
n = in_len / TF_BLOCK_SIZE; |
|
|
|
|
|
for(i = 0; i < n; i++) { |
|
|
|
|
|
fix_xor(tmp, &in[i * TF_BLOCK_SIZE]); |
|
|
|
|
|
twofish_internal_encrypt(tmp, ctx); |
|
|
|
|
|
memcpy(&out[i * TF_BLOCK_SIZE], tmp, TF_BLOCK_SIZE); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
n = in_len / TF_BLOCK_SIZE; |
|
|
return n * TF_BLOCK_SIZE; |
|
|
for(i=0; i < n; i++) { |
|
|
|
|
|
fix_xor(tmp, &in[i * TF_BLOCK_SIZE]); |
|
|
|
|
|
twofish_internal_encrypt(tmp, ctx); |
|
|
|
|
|
memcpy(&out[i * TF_BLOCK_SIZE], tmp, TF_BLOCK_SIZE); |
|
|
|
|
|
} |
|
|
|
|
|
return n * TF_BLOCK_SIZE; |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int tf_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
int tf_cbc_decrypt (unsigned char *out, const unsigned char *in, size_t in_len, |
|
|
const unsigned char *iv, tf_context_t *ctx) { |
|
|
const unsigned char *iv, tf_context_t *ctx) { |
|
|
|
|
|
|
|
|
int n; // number of blocks
|
|
|
int n; /* number of blocks */ |
|
|
int ret = (int)in_len & 15; // remainder
|
|
|
int ret = (int)in_len & 15; /* remainder */ |
|
|
|
|
|
|
|
|
uint8_t ivec[TF_BLOCK_SIZE]; // the ivec/old handling might be optimized if we
|
|
|
uint8_t ivec[TF_BLOCK_SIZE]; /* the ivec/old handling might be optimized if we */ |
|
|
uint8_t old[TF_BLOCK_SIZE]; // can be sure that in != out
|
|
|
uint8_t old[TF_BLOCK_SIZE]; /* can be sure that in != out */ |
|
|
|
|
|
|
|
|
memcpy(ivec, iv, TF_BLOCK_SIZE); |
|
|
memcpy(ivec, iv, TF_BLOCK_SIZE); |
|
|
|
|
|
|
|
|
for(n = in_len / TF_BLOCK_SIZE; n > 2; n -=3) { |
|
|
// 3 parallel rails of twofish decryption
|
|
|
|
|
|
for(n = in_len / TF_BLOCK_SIZE; n > 2; n -=3) { |
|
|
|
|
|
memcpy(old, in + 2 * TF_BLOCK_SIZE, TF_BLOCK_SIZE); |
|
|
|
|
|
|
|
|
|
|
|
uint32_t T0, T1; |
|
|
|
|
|
uint32_t Q0, Q1, Q2, Q3, R0, R1, R2, R3, S0, S1, S2, S3; |
|
|
|
|
|
|
|
|
|
|
|
// load/byteswap/whiten input/iv
|
|
|
|
|
|
Q3 = ctx->K[7] ^ le32toh(((uint32_t*)in)[3]); |
|
|
|
|
|
Q2 = ctx->K[6] ^ le32toh(((uint32_t*)in)[2]); |
|
|
|
|
|
Q1 = ctx->K[5] ^ le32toh(((uint32_t*)in)[1]); |
|
|
|
|
|
Q0 = ctx->K[4] ^ le32toh(((uint32_t*)in)[0]); |
|
|
|
|
|
|
|
|
|
|
|
R3 = ctx->K[7] ^ le32toh(((uint32_t*)in)[7]); |
|
|
|
|
|
R2 = ctx->K[6] ^ le32toh(((uint32_t*)in)[6]); |
|
|
|
|
|
R1 = ctx->K[5] ^ le32toh(((uint32_t*)in)[5]); |
|
|
|
|
|
R0 = ctx->K[4] ^ le32toh(((uint32_t*)in)[4]); |
|
|
|
|
|
|
|
|
|
|
|
S3 = ctx->K[7] ^ le32toh(((uint32_t*)in)[11]); |
|
|
|
|
|
S2 = ctx->K[6] ^ le32toh(((uint32_t*)in)[10]); |
|
|
|
|
|
S1 = ctx->K[5] ^ le32toh(((uint32_t*)in)[9]); |
|
|
|
|
|
S0 = ctx->K[4] ^ le32toh(((uint32_t*)in)[8]); |
|
|
|
|
|
|
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 15); DEC_ROUND(R0, R1, R2, R3, 15); DEC_ROUND(S0, S1, S2, S3, 15); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 14); DEC_ROUND(R2, R3, R0, R1, 14); DEC_ROUND(S2, S3, S0, S1, 14); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 13); DEC_ROUND(R0, R1, R2, R3, 13); DEC_ROUND(S0, S1, S2, S3, 13); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 12); DEC_ROUND(R2, R3, R0, R1, 12); DEC_ROUND(S2, S3, S0, S1, 12); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 11); DEC_ROUND(R0, R1, R2, R3, 11); DEC_ROUND(S0, S1, S2, S3, 11); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 10); DEC_ROUND(R2, R3, R0, R1, 10); DEC_ROUND(S2, S3, S0, S1, 10); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 9); DEC_ROUND(R0, R1, R2, R3, 9); DEC_ROUND(S0, S1, S2, S3, 9); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 8); DEC_ROUND(R2, R3, R0, R1, 8); DEC_ROUND(S2, S3, S0, S1, 8); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 7); DEC_ROUND(R0, R1, R2, R3, 7); DEC_ROUND(S0, S1, S2, S3, 7); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 6); DEC_ROUND(R2, R3, R0, R1, 6); DEC_ROUND(S2, S3, S0, S1, 6); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 5); DEC_ROUND(R0, R1, R2, R3, 5); DEC_ROUND(S0, S1, S2, S3, 5); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 4); DEC_ROUND(R2, R3, R0, R1, 4); DEC_ROUND(S2, S3, S0, S1, 4); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 3); DEC_ROUND(R0, R1, R2, R3, 3); DEC_ROUND(S0, S1, S2, S3, 3); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 2); DEC_ROUND(R2, R3, R0, R1, 2); DEC_ROUND(S2, S3, S0, S1, 2); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 1); DEC_ROUND(R0, R1, R2, R3, 1); DEC_ROUND(S0, S1, S2, S3, 1); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 0); DEC_ROUND(R2, R3, R0, R1, 0); DEC_ROUND(S2, S3, S0, S1, 0); |
|
|
|
|
|
|
|
|
|
|
|
// whiten/byteswap/store output/iv
|
|
|
|
|
|
((uint32_t*)out)[11] = htole32(S1 ^ ctx->K[3] ^ ((uint32_t*)in)[7]); |
|
|
|
|
|
((uint32_t*)out)[10] = htole32(S0 ^ ctx->K[2] ^ ((uint32_t*)in)[6]); |
|
|
|
|
|
((uint32_t*)out)[9] = htole32(S3 ^ ctx->K[1] ^ ((uint32_t*)in)[5]); |
|
|
|
|
|
((uint32_t*)out)[8] = htole32(S2 ^ ctx->K[0] ^ ((uint32_t*)in)[4]); |
|
|
|
|
|
|
|
|
|
|
|
((uint32_t*)out)[7] = htole32(R1 ^ ctx->K[3] ^ ((uint32_t*)in)[3]); |
|
|
|
|
|
((uint32_t*)out)[6] = htole32(R0 ^ ctx->K[2] ^ ((uint32_t*)in)[2]); |
|
|
|
|
|
((uint32_t*)out)[5] = htole32(R3 ^ ctx->K[1] ^ ((uint32_t*)in)[1]); |
|
|
|
|
|
((uint32_t*)out)[4] = htole32(R2 ^ ctx->K[0] ^ ((uint32_t*)in)[0]); |
|
|
|
|
|
|
|
|
|
|
|
((uint32_t*)out)[3] = htole32(Q1 ^ ctx->K[3] ^ ((uint32_t*)ivec)[3]); |
|
|
|
|
|
((uint32_t*)out)[2] = htole32(Q0 ^ ctx->K[2] ^ ((uint32_t*)ivec)[2]); |
|
|
|
|
|
((uint32_t*)out)[1] = htole32(Q3 ^ ctx->K[1] ^ ((uint32_t*)ivec)[1]); |
|
|
|
|
|
((uint32_t*)out)[0] = htole32(Q2 ^ ctx->K[0] ^ ((uint32_t*)ivec)[0]); |
|
|
|
|
|
|
|
|
|
|
|
in += 3 * TF_BLOCK_SIZE; out += 3 * TF_BLOCK_SIZE; |
|
|
|
|
|
|
|
|
|
|
|
memcpy(ivec, old, TF_BLOCK_SIZE); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
memcpy(old, in + 2 * TF_BLOCK_SIZE, TF_BLOCK_SIZE); |
|
|
// handle the two or less remaining block on a single rail
|
|
|
|
|
|
for(; n != 0; n--) { |
|
|
|
|
|
uint32_t T0, T1; |
|
|
|
|
|
uint32_t Q0, Q1, Q2, Q3; |
|
|
|
|
|
|
|
|
|
|
|
memcpy(old, in, TF_BLOCK_SIZE); |
|
|
|
|
|
|
|
|
|
|
|
// load/byteswap/whiten input
|
|
|
|
|
|
Q3 = ctx->K[7] ^ le32toh(((uint32_t*)in)[3]); |
|
|
|
|
|
Q2 = ctx->K[6] ^ le32toh(((uint32_t*)in)[2]); |
|
|
|
|
|
Q1 = ctx->K[5] ^ le32toh(((uint32_t*)in)[1]); |
|
|
|
|
|
Q0 = ctx->K[4] ^ le32toh(((uint32_t*)in)[0]); |
|
|
|
|
|
|
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 15); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 14); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 13); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 12); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 11); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 10); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 9); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 8); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 7); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 6); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 5); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 4); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 3); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 2); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 1); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 0); |
|
|
|
|
|
|
|
|
|
|
|
// load/byteswap/whiten output/iv
|
|
|
|
|
|
((uint32_t*)out)[3] = htole32(Q1 ^ ctx->K[3] ^ ((uint32_t*)ivec)[3]); |
|
|
|
|
|
((uint32_t*)out)[2] = htole32(Q0 ^ ctx->K[2] ^ ((uint32_t*)ivec)[2]); |
|
|
|
|
|
((uint32_t*)out)[1] = htole32(Q3 ^ ctx->K[1] ^ ((uint32_t*)ivec)[1]); |
|
|
|
|
|
((uint32_t*)out)[0] = htole32(Q2 ^ ctx->K[0] ^ ((uint32_t*)ivec)[0]); |
|
|
|
|
|
|
|
|
|
|
|
in += TF_BLOCK_SIZE; out+= TF_BLOCK_SIZE; |
|
|
|
|
|
|
|
|
|
|
|
memcpy(ivec, old, TF_BLOCK_SIZE); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
uint32_t T0, T1; |
|
|
return n * TF_BLOCK_SIZE; |
|
|
uint32_t Q0, Q1, Q2, Q3, R0, R1, R2, R3, S0, S1, S2, S3; |
|
|
|
|
|
|
|
|
|
|
|
/* load/byteswap/whiten input/iv */ |
|
|
|
|
|
Q3 = ctx->K[7] ^ le32toh(((uint32_t*)in)[3]); |
|
|
|
|
|
Q2 = ctx->K[6] ^ le32toh(((uint32_t*)in)[2]); |
|
|
|
|
|
Q1 = ctx->K[5] ^ le32toh(((uint32_t*)in)[1]); |
|
|
|
|
|
Q0 = ctx->K[4] ^ le32toh(((uint32_t*)in)[0]); |
|
|
|
|
|
|
|
|
|
|
|
R3 = ctx->K[7] ^ le32toh(((uint32_t*)in)[7]); |
|
|
|
|
|
R2 = ctx->K[6] ^ le32toh(((uint32_t*)in)[6]); |
|
|
|
|
|
R1 = ctx->K[5] ^ le32toh(((uint32_t*)in)[5]); |
|
|
|
|
|
R0 = ctx->K[4] ^ le32toh(((uint32_t*)in)[4]); |
|
|
|
|
|
|
|
|
|
|
|
S3 = ctx->K[7] ^ le32toh(((uint32_t*)in)[11]); |
|
|
|
|
|
S2 = ctx->K[6] ^ le32toh(((uint32_t*)in)[10]); |
|
|
|
|
|
S1 = ctx->K[5] ^ le32toh(((uint32_t*)in)[9]); |
|
|
|
|
|
S0 = ctx->K[4] ^ le32toh(((uint32_t*)in)[8]); |
|
|
|
|
|
|
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 15); DEC_ROUND(R0, R1, R2, R3, 15); DEC_ROUND(S0, S1, S2, S3, 15); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 14); DEC_ROUND(R2, R3, R0, R1, 14); DEC_ROUND(S2, S3, S0, S1, 14); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 13); DEC_ROUND(R0, R1, R2, R3, 13); DEC_ROUND(S0, S1, S2, S3, 13); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 12); DEC_ROUND(R2, R3, R0, R1, 12); DEC_ROUND(S2, S3, S0, S1, 12); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 11); DEC_ROUND(R0, R1, R2, R3, 11); DEC_ROUND(S0, S1, S2, S3, 11); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 10); DEC_ROUND(R2, R3, R0, R1, 10); DEC_ROUND(S2, S3, S0, S1, 10); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 9); DEC_ROUND(R0, R1, R2, R3, 9); DEC_ROUND(S0, S1, S2, S3, 9); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 8); DEC_ROUND(R2, R3, R0, R1, 8); DEC_ROUND(S2, S3, S0, S1, 8); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 7); DEC_ROUND(R0, R1, R2, R3, 7); DEC_ROUND(S0, S1, S2, S3, 7); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 6); DEC_ROUND(R2, R3, R0, R1, 6); DEC_ROUND(S2, S3, S0, S1, 6); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 5); DEC_ROUND(R0, R1, R2, R3, 5); DEC_ROUND(S0, S1, S2, S3, 5); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 4); DEC_ROUND(R2, R3, R0, R1, 4); DEC_ROUND(S2, S3, S0, S1, 4); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 3); DEC_ROUND(R0, R1, R2, R3, 3); DEC_ROUND(S0, S1, S2, S3, 3); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 2); DEC_ROUND(R2, R3, R0, R1, 2); DEC_ROUND(S2, S3, S0, S1, 2); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 1); DEC_ROUND(R0, R1, R2, R3, 1); DEC_ROUND(S0, S1, S2, S3, 1); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 0); DEC_ROUND(R2, R3, R0, R1, 0); DEC_ROUND(S2, S3, S0, S1, 0); |
|
|
|
|
|
|
|
|
|
|
|
/* load/byteswap/whiten output/iv */ |
|
|
|
|
|
|
|
|
|
|
|
((uint32_t*)out)[11] = htole32(S1 ^ ctx->K[3] ^ ((uint32_t*)in)[7]); |
|
|
|
|
|
((uint32_t*)out)[10] = htole32(S0 ^ ctx->K[2] ^ ((uint32_t*)in)[6]); |
|
|
|
|
|
((uint32_t*)out)[9] = htole32(S3 ^ ctx->K[1] ^ ((uint32_t*)in)[5]); |
|
|
|
|
|
((uint32_t*)out)[8] = htole32(S2 ^ ctx->K[0] ^ ((uint32_t*)in)[4]); |
|
|
|
|
|
|
|
|
|
|
|
((uint32_t*)out)[7] = htole32(R1 ^ ctx->K[3] ^ ((uint32_t*)in)[3]); |
|
|
|
|
|
((uint32_t*)out)[6] = htole32(R0 ^ ctx->K[2] ^ ((uint32_t*)in)[2]); |
|
|
|
|
|
((uint32_t*)out)[5] = htole32(R3 ^ ctx->K[1] ^ ((uint32_t*)in)[1]); |
|
|
|
|
|
((uint32_t*)out)[4] = htole32(R2 ^ ctx->K[0] ^ ((uint32_t*)in)[0]); |
|
|
|
|
|
|
|
|
|
|
|
((uint32_t*)out)[3] = htole32(Q1 ^ ctx->K[3] ^ ((uint32_t*)ivec)[3]); |
|
|
|
|
|
((uint32_t*)out)[2] = htole32(Q0 ^ ctx->K[2] ^ ((uint32_t*)ivec)[2]); |
|
|
|
|
|
((uint32_t*)out)[1] = htole32(Q3 ^ ctx->K[1] ^ ((uint32_t*)ivec)[1]); |
|
|
|
|
|
((uint32_t*)out)[0] = htole32(Q2 ^ ctx->K[0] ^ ((uint32_t*)ivec)[0]); |
|
|
|
|
|
|
|
|
|
|
|
in += 3 * TF_BLOCK_SIZE; out += 3 * TF_BLOCK_SIZE; |
|
|
|
|
|
|
|
|
|
|
|
memcpy(ivec, old, TF_BLOCK_SIZE); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
for(; n != 0; n--) { |
|
|
|
|
|
uint32_t T0, T1; |
|
|
|
|
|
uint32_t Q0, Q1, Q2, Q3; |
|
|
|
|
|
|
|
|
|
|
|
memcpy (old, in, TF_BLOCK_SIZE); |
|
|
|
|
|
|
|
|
|
|
|
/* load/byteswap/whiten input */ |
|
|
|
|
|
Q3 = ctx->K[7] ^ le32toh(((uint32_t*)in)[3]); |
|
|
|
|
|
Q2 = ctx->K[6] ^ le32toh(((uint32_t*)in)[2]); |
|
|
|
|
|
Q1 = ctx->K[5] ^ le32toh(((uint32_t*)in)[1]); |
|
|
|
|
|
Q0 = ctx->K[4] ^ le32toh(((uint32_t*)in)[0]); |
|
|
|
|
|
|
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 15); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 14); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 13); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 12); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 11); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 10); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 9); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 8); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 7); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 6); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 5); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 4); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 3); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 2); |
|
|
|
|
|
DEC_ROUND(Q0, Q1, Q2, Q3, 1); |
|
|
|
|
|
DEC_ROUND(Q2, Q3, Q0, Q1, 0); |
|
|
|
|
|
|
|
|
|
|
|
/* load/byteswap/whiten output/iv */ |
|
|
|
|
|
((uint32_t*)out)[3] = htole32(Q1 ^ ctx->K[3] ^ ((uint32_t*)ivec)[3]); |
|
|
|
|
|
((uint32_t*)out)[2] = htole32(Q0 ^ ctx->K[2] ^ ((uint32_t*)ivec)[2]); |
|
|
|
|
|
((uint32_t*)out)[1] = htole32(Q3 ^ ctx->K[1] ^ ((uint32_t*)ivec)[1]); |
|
|
|
|
|
((uint32_t*)out)[0] = htole32(Q2 ^ ctx->K[0] ^ ((uint32_t*)ivec)[0]); |
|
|
|
|
|
|
|
|
|
|
|
in += TF_BLOCK_SIZE; out+= TF_BLOCK_SIZE; |
|
|
|
|
|
|
|
|
|
|
|
memcpy (ivec, old, TF_BLOCK_SIZE); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
return n * TF_BLOCK_SIZE; |
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
|
* By definition twofish can only accept key up to 256 bit |
|
|
// by definition twofish can only accept key up to 256 bit
|
|
|
* we wont do any checking here and will assume user already |
|
|
// we wont do any checking here and will assume user already
|
|
|
* know about it. Twofish is undefined for key larger than 256 bit |
|
|
// know about it. twofish is undefined for key larger than 256 bit
|
|
|
*/ |
|
|
|
|
|
int tf_init (const unsigned char *key, size_t key_size, tf_context_t **ctx) { |
|
|
int tf_init (const unsigned char *key, size_t key_size, tf_context_t **ctx) { |
|
|
|
|
|
|
|
|
int k; |
|
|
int k; |
|
|
uint32_t *S; |
|
|
uint32_t *S; |
|
|
|
|
|
|
|
|
|
|
|
*ctx = calloc(1, sizeof(tf_context_t)); |
|
|
|
|
|
if(!(*ctx)) { |
|
|
|
|
|
return -1; |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
*ctx = calloc(1, sizeof(tf_context_t)); |
|
|
(*ctx)->N = key_size; |
|
|
if(!(*ctx)) { |
|
|
keySched(key, key_size, &S, (*ctx)->K, &k); |
|
|
return -1; |
|
|
fullKey(S, k, (*ctx)->QF); |
|
|
} |
|
|
free(S); /* allocated in keySched(...) */ |
|
|
(*ctx)->N = key_size; |
|
|
|
|
|
keySched(key, key_size, &S, (*ctx)->K, &k); |
|
|
|
|
|
fullKey(S, k, (*ctx)->QF); |
|
|
|
|
|
free(S); // allocated in keySched(...)
|
|
|
|
|
|
|
|
|
|
|
|
return 0; |
|
|
return 0; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
int tf_deinit (tf_context_t *ctx) { |
|
|
int tf_deinit (tf_context_t *ctx) { |
|
|
|
|
|
|
|
|
if (ctx) free (ctx); |
|
|
if(ctx) free(ctx); |
|
|
|
|
|
|
|
|
return 0; |
|
|
return 0; |
|
|
} |
|
|
} |
|
|