From 8d0c52102c160f4e6904d460c0a0451ea401515d Mon Sep 17 00:00:00 2001 From: Logan007 Date: Thu, 2 Jul 2020 12:49:25 +0545 Subject: [PATCH] added optional 16-bit look-up table --- include/pearson.h | 2 + src/edge_utils.c | 2 + src/pearson.c | 122 +++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 113 insertions(+), 13 deletions(-) diff --git a/include/pearson.h b/include/pearson.h index 2e32278..24e51fe 100644 --- a/include/pearson.h +++ b/include/pearson.h @@ -21,3 +21,5 @@ void pearson_hash_256 (uint8_t *out, const uint8_t *in, size_t len); void pearson_hash_128 (uint8_t *out, const uint8_t *in, size_t len); uint16_t pearson_hash_16 (const uint8_t *in, size_t len); + +void pearson_hash_init(); diff --git a/src/edge_utils.c b/src/edge_utils.c index 6158165..00b96e4 100644 --- a/src/edge_utils.c +++ b/src/edge_utils.c @@ -233,6 +233,8 @@ n2n_edge_t* edge_init(const tuntap_dev *dev, const n2n_edge_conf_t *conf, int *r eee->pending_peers = NULL; eee->sup_attempts = N2N_EDGE_SUP_ATTEMPTS; + pearson_hash_init(); + if(eee->conf.compression == N2N_COMPRESSION_ID_LZO) if(lzo_init() != LZO_E_OK) { traceEvent(TRACE_ERROR, "LZO compression error"); diff --git a/src/pearson.c b/src/pearson.c index d635662..e800185 100644 --- a/src/pearson.c +++ b/src/pearson.c @@ -17,7 +17,6 @@ */ // taken from https://github.com/Logan007/pearson - // This is free and unencumbered software released into the public domain. #include @@ -25,6 +24,10 @@ #include "pearson.h" +// compile with 'LOW_MEM_FOOTPRINT' defined to make use of 256 byte look-up tabe only +// otherwise, a 16-bit look-up table is used which allows considerably faster hashing +// however, it needs to be generated by once calling pearson_hash_init() upfront +// #define LOW_MEM_FOOTPRINT // table as in original paper "Fast Hashing of Variable-Length Text Strings" by Peter K. Pearson // as published in The Communications of the ACM Vol.33, No. 6 (June 1990), pp. 677-680. @@ -67,11 +70,16 @@ static const uint8_t t[256] ={ 0xd8, 0x83, 0x59, 0x15, 0x1c, 0x85, 0x25, 0x99, 0x95, 0x50, 0xaa, 0x44, 0x06, 0xa9, 0xea, 0x97 }; */ +#ifndef LOW_MEM_FOOTPRINT +static uint16_t t16[65536]; // 16-bit look-up table +#endif + #define ROR64(x,r) (((x)>>(r))|((x)<<(64-(r)))) void pearson_hash_256 (uint8_t *out, const uint8_t *in, size_t len) { + size_t i; /* initial values - astonishingly, assembling using SHIFTs and ORs (in register) * works faster on well pipelined CPUs than loading the 64-bit value from memory. * however, there is one advantage to loading from memory: as we also store back to @@ -79,10 +87,8 @@ void pearson_hash_256 (uint8_t *out, const uint8_t *in, size_t len) { uint8_t upper[8] = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; uint8_t lower[8] = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 }; - uint64_t *upper_hash_mask_ptr = (uint64_t*)&upper; - uint64_t *lower_hash_mask_ptr = (uint64_t*)&lower; - uint64_t upper_hash_mask = *upper_hash_mask_ptr; - uint64_t lower_hash_mask = *lower_hash_mask_ptr; + uint64_t upper_hash_mask = *(uint64_t*)&upper; + uint64_t lower_hash_mask = *(uint64_t*)&lower; uint64_t high_upper_hash_mask = upper_hash_mask + 0x1010101010101010; uint64_t high_lower_hash_mask = lower_hash_mask + 0x1010101010101010; @@ -90,7 +96,6 @@ void pearson_hash_256 (uint8_t *out, const uint8_t *in, size_t len) { uint64_t lower_hash = 0; uint64_t high_upper_hash = 0; uint64_t high_lower_hash = 0; - size_t i; for (i = 0; i < len; i++) { // broadcast the character, xor into hash, make them different permutations @@ -104,8 +109,9 @@ void pearson_hash_256 (uint8_t *out, const uint8_t *in, size_t len) { high_lower_hash ^= c ^ high_lower_hash_mask; // table lookup - uint8_t x; uint64_t h = 0; +#ifdef LOW_MEM_FOOTPRINT // 256 byte look-up table ---------- + uint8_t x; x = upper_hash; x = t[x]; upper_hash >>= 8; h |= x; h=ROR64(h,8); x = upper_hash; x = t[x]; upper_hash >>= 8; h |= x; h=ROR64(h,8); x = upper_hash; x = t[x]; upper_hash >>= 8; h |= x; h=ROR64(h,8); @@ -148,6 +154,35 @@ void pearson_hash_256 (uint8_t *out, const uint8_t *in, size_t len) { x = high_lower_hash; x = t[x]; high_lower_hash >>= 8; h |= x; h=ROR64(h,8); x = high_lower_hash; x = t[x]; high_lower_hash >>= 8; h |= x; h=ROR64(h,8); high_lower_hash = h; +#else // 16-bit look-up table ------------------------------- + uint16_t x; + x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h=ROR64(h,16); + upper_hash = h; + + h = 0; + x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h=ROR64(h,16); + lower_hash = h; + + h = 0; + x = high_upper_hash; x = t16[x]; high_upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = high_upper_hash; x = t16[x]; high_upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = high_upper_hash; x = t16[x]; high_upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = high_upper_hash; x = t16[x]; high_upper_hash >>= 16; h |= x; h=ROR64(h,16); + high_upper_hash = h; + + h = 0; + x = high_lower_hash; x = t16[x]; high_lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = high_lower_hash; x = t16[x]; high_lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = high_lower_hash; x = t16[x]; high_lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = high_lower_hash; x = t16[x]; high_lower_hash >>= 16; h |= x; h=ROR64(h,16); + high_lower_hash = h; +#endif // LOW_MEM_FOOTPRINT ------ } // store output uint64_t *o; @@ -164,6 +199,7 @@ void pearson_hash_256 (uint8_t *out, const uint8_t *in, size_t len) { void pearson_hash_128 (uint8_t *out, const uint8_t *in, size_t len) { + size_t i; /* initial values - astonishingly, assembling using SHIFTs and ORs (in register) * works faster on well pipelined CPUs than loading the 64-bit value from memory. * however, there is one advantage to loading from memory: as we also store back to @@ -171,14 +207,11 @@ void pearson_hash_128 (uint8_t *out, const uint8_t *in, size_t len) { uint8_t upper[8] = { 0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 }; uint8_t lower[8] = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 }; - uint64_t *upper_hash_mask_ptr = (uint64_t*)&upper; - uint64_t *lower_hash_mask_ptr = (uint64_t*)&lower; - uint64_t upper_hash_mask = *upper_hash_mask_ptr; - uint64_t lower_hash_mask = *lower_hash_mask_ptr; + uint64_t upper_hash_mask = *(uint64_t*)&upper; + uint64_t lower_hash_mask = *(uint64_t*)&lower; uint64_t upper_hash = 0; uint64_t lower_hash = 0; - size_t i; for (i = 0; i < len; i++) { // broadcast the character, xor into hash, make them different permutations @@ -189,8 +222,9 @@ void pearson_hash_128 (uint8_t *out, const uint8_t *in, size_t len) { upper_hash ^= c ^ upper_hash_mask; lower_hash ^= c ^ lower_hash_mask; // table lookup - uint8_t x; uint64_t h = 0; +#ifdef LOW_MEM_FOOTPRINT // 256 byte look-up table ---------- + uint8_t x; x = upper_hash; x = t[x]; upper_hash >>= 8; h |= x; h=ROR64(h,8); x = upper_hash; x = t[x]; upper_hash >>= 8; h |= x; h=ROR64(h,8); x = upper_hash; x = t[x]; upper_hash >>= 8; h |= x; h=ROR64(h,8); @@ -210,7 +244,22 @@ void pearson_hash_128 (uint8_t *out, const uint8_t *in, size_t len) { x = lower_hash; x = t[x]; lower_hash >>= 8; h |= x; h=ROR64(h,8); x = lower_hash; x = t[x]; lower_hash >>= 8; h |= x; h=ROR64(h,8); x = lower_hash; x = t[x]; lower_hash >>= 8; h |= x; h=ROR64(h,8); + lower_hash= h; +#else // 16-bit look-up table ------------------------------- + uint16_t x; + x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h=ROR64(h,16); + x = upper_hash; x = t16[x]; upper_hash >>= 16; h |= x; h=ROR64(h,16); + upper_hash = h; + + h = 0; + x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h=ROR64(h,16); + x = lower_hash; x = t16[x]; lower_hash >>= 16; h |= x; h=ROR64(h,16); lower_hash = h; +#endif // LOW_MEM_FOOTPRINT ------ } // store output uint64_t *o; @@ -220,6 +269,38 @@ void pearson_hash_128 (uint8_t *out, const uint8_t *in, size_t len) { *o = lower_hash; } +/* --- for later use --- +// 32-bit hash: the return value has to be interpreted as uint32_t and +// follows machine-specific endianess in memory +uint32_t pearson_hash_32 (const uint8_t *in, size_t len) { + + size_t i; + uint32_t hash = 0; + uint32_t hash_mask = 0x03020100; + + for (i = 0; i < len; i++) { + // broadcast the character, xor into hash, make them different permutations + uint32_t c = (uint8_t)in[i]; + c |= c << 8; + c |= c << 16; + hash ^= c ^ hash_mask; + // table lookup +#ifdef LOW_MEM_FOOTPRINT + uint32_t h = 0; + uint8_t x; + x = hash; x = t[x]; hash >>= 8; h |= x; h=ROR32(h,8); + x = hash; x = t[x]; hash >>= 8; h |= x; h=ROR32(h,8); + x = hash; x = t[x]; hash >>= 8; h |= x; h=ROR32(h,8); + x = hash; x = t[x]; hash >>= 8; h |= x; h=ROR32(h,8); + hash = h; +#else + hash = (t16[hash >> 16] << 16) + t16[(uint16_t)hash]; +#endif + } + // output + return hash; +} --- pearson_hash_32 for later use --- */ + // 16-bit hash: the return value has to be interpreted as uint16_t and // follows machine-specific endianess in memory @@ -234,8 +315,23 @@ uint16_t pearson_hash_16 (const uint8_t *in, size_t len) { c |= c << 8; hash ^= c ^ hash_mask; // table lookup +#ifdef LOW_MEM_FOOTPRINT hash = t[(uint8_t)hash] + (t[hash >> 8] << 8); +#else + hash = t16[hash]; +#endif } // output return hash; } + + +void pearson_hash_init () { + +#ifndef LOW_MEM_FOOTPRINT + size_t i; + + for (i = 0; i < 65536; i++) + t16[i] = (t[i >> 8] << 8) + t[(uint8_t)i]; +#endif +}