From a64cfa450eeac5ee2d7fc91b8286cbf9c04af100 Mon Sep 17 00:00:00 2001 From: Logan007 Date: Mon, 29 Jun 2020 15:44:51 +0545 Subject: [PATCH] added 16-bit hashing --- include/pearson.h | 2 ++ src/pearson.c | 20 ++++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/include/pearson.h b/include/pearson.h index 6aaa604..2e32278 100644 --- a/include/pearson.h +++ b/include/pearson.h @@ -19,3 +19,5 @@ void pearson_hash_256 (uint8_t *out, const uint8_t *in, size_t len); void pearson_hash_128 (uint8_t *out, const uint8_t *in, size_t len); + +uint16_t pearson_hash_16 (const uint8_t *in, size_t len); diff --git a/src/pearson.c b/src/pearson.c index 2e5313b..882e105 100644 --- a/src/pearson.c +++ b/src/pearson.c @@ -219,3 +219,23 @@ void pearson_hash_128 (uint8_t *out, const uint8_t *in, size_t len) { o = (uint64_t*)&out[8]; *o = lower_hash; } + + +// 16-bit hash: the return value has to be interpreted as uint16_t and +// follows machine-specific endianess in memory +uint16_t pearson_hash_16 (const uint8_t *in, size_t len) { + + uint16_t hash = 0; + uint16_t hash_mask = 0x0100; + + for (size_t i = 0; i < len; i++) { + // broadcast the character, xor into hash, make them different permutations + uint16_t c = (uint8_t)in[i]; + c |= c << 8; + hash ^= c ^ hash_mask; + // table lookup + hash = t[(uint8_t)hash] + (t[hash >> 8] << 8); + } + // output + return hash; +}