527 lines
14 KiB
C++
527 lines
14 KiB
C++
// Copyright (C) 2011 Davis E. King (davis@dlib.net)
|
|
// License: Boost Software License See LICENSE.txt for the full license.
|
|
#ifndef DLIB_MURMUR_HAsH_3_Hh_
|
|
#define DLIB_MURMUR_HAsH_3_Hh_
|
|
|
|
#include "murmur_hash3_abstract.h"
|
|
#include "../uintn.h"
|
|
#include <utility>
|
|
#include <string.h>
|
|
|
|
namespace dlib
|
|
{
|
|
//-----------------------------------------------------------------------------
|
|
// The original MurmurHash3 code was written by Austin Appleby, and is placed
|
|
// in the public domain. The author hereby disclaims copyright to this source code.
|
|
// The code in this particular file was modified by Davis E. King. In
|
|
// particular, endian-swapping was added along with some other minor code
|
|
// changes like avoiding strict aliasing violations.
|
|
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// Platform-specific functions and macros
|
|
|
|
// Microsoft Visual Studio
|
|
|
|
|
|
#if ((defined(__GNUC__) && __GNUC__ >= 7) || defined(__clang__))
|
|
#define DLIB_FALLTHROUGH [[fallthrough]]
|
|
#else
|
|
#define DLIB_FALLTHROUGH
|
|
#endif
|
|
|
|
#if defined(_MSC_VER)
|
|
|
|
#define DLIB_FORCE_INLINE __forceinline
|
|
|
|
#include <stdlib.h>
|
|
|
|
#define DLIB_ROTL32(x,y) _rotl(x,y)
|
|
#define DLIB_ROTL64(x,y) _rotl64(x,y)
|
|
|
|
#define DLIB_BIG_CONSTANT(x) (x)
|
|
|
|
// Other compilers
|
|
|
|
#else // defined(_MSC_VER)
|
|
|
|
#define DLIB_FORCE_INLINE __attribute__((always_inline)) inline
|
|
|
|
inline uint32 murmur_rotl32 ( uint32 x, int8 r )
|
|
{
|
|
return (x << r) | (x >> (32 - r));
|
|
}
|
|
|
|
inline uint64 murmur_rotl64 ( uint64 x, int8 r )
|
|
{
|
|
return (x << r) | (x >> (64 - r));
|
|
}
|
|
|
|
#define DLIB_ROTL32(x,y) dlib::murmur_rotl32(x,y)
|
|
#define DLIB_ROTL64(x,y) dlib::murmur_rotl64(x,y)
|
|
|
|
#define DLIB_BIG_CONSTANT(x) (x##LLU)
|
|
|
|
#endif // !defined(_MSC_VER)
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// Block read - if your platform needs to do endian-swapping or can only
|
|
// handle aligned reads, do the conversion here
|
|
|
|
DLIB_FORCE_INLINE uint32 murmur_getblock ( const uint32 * p, int i )
|
|
{
|
|
// The reason we do a memcpy() here instead of simply returning p[i] is because
|
|
// doing it this way avoids violations of the strict aliasing rule when all these
|
|
// functions are inlined into the user's code.
|
|
uint32 temp;
|
|
memcpy(&temp, p+i, 4);
|
|
return temp;
|
|
}
|
|
|
|
DLIB_FORCE_INLINE uint32 murmur_getblock_byte_swap ( const uint32 * p, int i )
|
|
{
|
|
union
|
|
{
|
|
uint8 bytes[4];
|
|
uint32 val;
|
|
} temp;
|
|
|
|
const uint8* pp = reinterpret_cast<const uint8*>(p + i);
|
|
temp.bytes[0] = pp[3];
|
|
temp.bytes[1] = pp[2];
|
|
temp.bytes[2] = pp[1];
|
|
temp.bytes[3] = pp[0];
|
|
|
|
return temp.val;
|
|
}
|
|
|
|
DLIB_FORCE_INLINE uint64 murmur_getblock ( const uint64 * p, int i )
|
|
{
|
|
// The reason we do a memcpy() here instead of simply returning p[i] is because
|
|
// doing it this way avoids violations of the strict aliasing rule when all these
|
|
// functions are inlined into the user's code.
|
|
uint64 temp;
|
|
memcpy(&temp, p+i, 8);
|
|
return temp;
|
|
}
|
|
|
|
DLIB_FORCE_INLINE uint64 murmur_getblock_byte_swap ( const uint64 * p, int i )
|
|
{
|
|
union
|
|
{
|
|
uint8 bytes[8];
|
|
uint64 val;
|
|
} temp;
|
|
|
|
const uint8* pp = reinterpret_cast<const uint8*>(p + i);
|
|
temp.bytes[0] = pp[7];
|
|
temp.bytes[1] = pp[6];
|
|
temp.bytes[2] = pp[5];
|
|
temp.bytes[3] = pp[4];
|
|
temp.bytes[4] = pp[3];
|
|
temp.bytes[5] = pp[2];
|
|
temp.bytes[6] = pp[1];
|
|
temp.bytes[7] = pp[0];
|
|
|
|
return temp.val;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
// Finalization mix - force all bits of a hash block to avalanche
|
|
|
|
DLIB_FORCE_INLINE uint32 murmur_fmix ( uint32 h )
|
|
{
|
|
h ^= h >> 16;
|
|
h *= 0x85ebca6b;
|
|
h ^= h >> 13;
|
|
h *= 0xc2b2ae35;
|
|
h ^= h >> 16;
|
|
|
|
return h;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
DLIB_FORCE_INLINE uint64 murmur_fmix ( uint64 k )
|
|
{
|
|
k ^= k >> 33;
|
|
k *= DLIB_BIG_CONSTANT(0xff51afd7ed558ccd);
|
|
k ^= k >> 33;
|
|
k *= DLIB_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
|
|
k ^= k >> 33;
|
|
|
|
return k;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline uint32 murmur_hash3 (
|
|
const void * key,
|
|
const int len,
|
|
const uint32 seed = 0
|
|
)
|
|
{
|
|
const uint8 * data = (const uint8*)key;
|
|
const int nblocks = len / 4;
|
|
|
|
uint32 h1 = seed;
|
|
|
|
uint32 c1 = 0xcc9e2d51;
|
|
uint32 c2 = 0x1b873593;
|
|
|
|
//----------
|
|
// body
|
|
|
|
const uint32 * blocks = (const uint32 *)(data + nblocks*4);
|
|
|
|
bool is_little_endian = true;
|
|
uint32 endian_test = 1;
|
|
if (*reinterpret_cast<unsigned char*>(&endian_test) != 1)
|
|
is_little_endian = false;
|
|
|
|
|
|
if (is_little_endian)
|
|
{
|
|
for(int i = -nblocks; i; i++)
|
|
{
|
|
uint32 k1 = murmur_getblock(blocks,i);
|
|
|
|
k1 *= c1;
|
|
k1 = DLIB_ROTL32(k1,15);
|
|
k1 *= c2;
|
|
|
|
h1 ^= k1;
|
|
h1 = DLIB_ROTL32(h1,13);
|
|
h1 = h1*5+0xe6546b64;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(int i = -nblocks; i; i++)
|
|
{
|
|
uint32 k1 = murmur_getblock_byte_swap(blocks,i);
|
|
|
|
k1 *= c1;
|
|
k1 = DLIB_ROTL32(k1,15);
|
|
k1 *= c2;
|
|
|
|
h1 ^= k1;
|
|
h1 = DLIB_ROTL32(h1,13);
|
|
h1 = h1*5+0xe6546b64;
|
|
}
|
|
}
|
|
|
|
//----------
|
|
// tail
|
|
|
|
const uint8 * tail = (const uint8*)(data + nblocks*4);
|
|
|
|
uint32 k1 = 0;
|
|
|
|
switch(len & 3)
|
|
{
|
|
case 3: k1 ^= tail[2] << 16;
|
|
DLIB_FALLTHROUGH; // fall through
|
|
case 2: k1 ^= tail[1] << 8;
|
|
DLIB_FALLTHROUGH; // fall through
|
|
case 1: k1 ^= tail[0];
|
|
k1 *= c1; k1 = DLIB_ROTL32(k1,15); k1 *= c2; h1 ^= k1;
|
|
};
|
|
|
|
//----------
|
|
// finalization
|
|
|
|
h1 ^= len;
|
|
|
|
h1 = murmur_fmix(h1);
|
|
|
|
return h1;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline uint32 murmur_hash3_2 (
|
|
const uint32 v1,
|
|
const uint32 v2
|
|
)
|
|
{
|
|
uint32 h1 = v2;
|
|
|
|
uint32 c1 = 0xcc9e2d51;
|
|
uint32 c2 = 0x1b873593;
|
|
|
|
//----------
|
|
// body
|
|
|
|
|
|
uint32 k1 = v1;
|
|
|
|
k1 *= c1;
|
|
k1 = DLIB_ROTL32(k1,15);
|
|
k1 *= c2;
|
|
|
|
h1 ^= k1;
|
|
h1 = DLIB_ROTL32(h1,13);
|
|
h1 = h1*5+0xe6546b64;
|
|
|
|
|
|
//----------
|
|
// finalization
|
|
|
|
h1 ^= 4; // =^ by length in bytes
|
|
|
|
h1 = murmur_fmix(h1);
|
|
|
|
return h1;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline uint32 murmur_hash3_3 (
|
|
const uint32 v1,
|
|
const uint32 v2,
|
|
const uint32 v3
|
|
)
|
|
{
|
|
|
|
uint32 h1 = v3;
|
|
|
|
uint32 c1 = 0xcc9e2d51;
|
|
uint32 c2 = 0x1b873593;
|
|
|
|
//----------
|
|
// body
|
|
|
|
|
|
uint32 k1 = v1;
|
|
|
|
k1 *= c1;
|
|
k1 = DLIB_ROTL32(k1,15);
|
|
k1 *= c2;
|
|
|
|
h1 ^= k1;
|
|
h1 = DLIB_ROTL32(h1,13);
|
|
h1 = h1*5+0xe6546b64;
|
|
|
|
k1 = v2;
|
|
k1 *= c1;
|
|
k1 = DLIB_ROTL32(k1,15);
|
|
k1 *= c2;
|
|
|
|
h1 ^= k1;
|
|
h1 = DLIB_ROTL32(h1,13);
|
|
h1 = h1*5+0xe6546b64;
|
|
|
|
//----------
|
|
// finalization
|
|
|
|
h1 ^= 8; // =^ by length in bytes
|
|
|
|
h1 = murmur_fmix(h1);
|
|
|
|
return h1;
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline std::pair<uint64,uint64> murmur_hash3_128bit (
|
|
const void* key,
|
|
const int len,
|
|
const uint64 seed = 0
|
|
)
|
|
{
|
|
const uint8 * data = (const uint8*)key;
|
|
const int nblocks = len / 16;
|
|
|
|
uint64 h1 = seed;
|
|
uint64 h2 = seed;
|
|
|
|
uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
|
|
uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
|
|
|
|
//----------
|
|
// body
|
|
|
|
const uint64 * blocks = (const uint64 *)(data);
|
|
|
|
bool is_little_endian = true;
|
|
uint32 endian_test = 1;
|
|
if (*reinterpret_cast<unsigned char*>(&endian_test) != 1)
|
|
is_little_endian = false;
|
|
|
|
|
|
if (is_little_endian)
|
|
{
|
|
for(int i = 0; i < nblocks; i++)
|
|
{
|
|
uint64 k1 = murmur_getblock(blocks,i*2+0);
|
|
uint64 k2 = murmur_getblock(blocks,i*2+1);
|
|
|
|
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
|
|
|
h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
|
|
|
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
|
|
|
h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for(int i = 0; i < nblocks; i++)
|
|
{
|
|
uint64 k1 = murmur_getblock_byte_swap(blocks,i*2+0);
|
|
uint64 k2 = murmur_getblock_byte_swap(blocks,i*2+1);
|
|
|
|
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
|
|
|
h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
|
|
|
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
|
|
|
h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
|
}
|
|
}
|
|
|
|
//----------
|
|
// tail
|
|
|
|
const uint8 * tail = (const uint8*)(data + nblocks*16);
|
|
|
|
uint64 k1 = 0;
|
|
uint64 k2 = 0;
|
|
|
|
switch(len & 15)
|
|
{
|
|
case 15: k2 ^= uint64(tail[14]) << 48; DLIB_FALLTHROUGH; // fall through
|
|
case 14: k2 ^= uint64(tail[13]) << 40; DLIB_FALLTHROUGH; // fall through
|
|
case 13: k2 ^= uint64(tail[12]) << 32; DLIB_FALLTHROUGH; // fall through
|
|
case 12: k2 ^= uint64(tail[11]) << 24; DLIB_FALLTHROUGH; // fall through
|
|
case 11: k2 ^= uint64(tail[10]) << 16; DLIB_FALLTHROUGH; // fall through
|
|
case 10: k2 ^= uint64(tail[ 9]) << 8; DLIB_FALLTHROUGH; // fall through
|
|
case 9: k2 ^= uint64(tail[ 8]) << 0;
|
|
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2; DLIB_FALLTHROUGH; // fall through
|
|
|
|
case 8: k1 ^= uint64(tail[ 7]) << 56; DLIB_FALLTHROUGH; // fall through
|
|
case 7: k1 ^= uint64(tail[ 6]) << 48; DLIB_FALLTHROUGH; // fall through
|
|
case 6: k1 ^= uint64(tail[ 5]) << 40; DLIB_FALLTHROUGH; // fall through
|
|
case 5: k1 ^= uint64(tail[ 4]) << 32; DLIB_FALLTHROUGH; // fall through
|
|
case 4: k1 ^= uint64(tail[ 3]) << 24; DLIB_FALLTHROUGH; // fall through
|
|
case 3: k1 ^= uint64(tail[ 2]) << 16; DLIB_FALLTHROUGH; // fall through
|
|
case 2: k1 ^= uint64(tail[ 1]) << 8; DLIB_FALLTHROUGH; // fall through
|
|
case 1: k1 ^= uint64(tail[ 0]) << 0;
|
|
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
|
};
|
|
|
|
//----------
|
|
// finalization
|
|
|
|
h1 ^= len; h2 ^= len;
|
|
|
|
h1 += h2;
|
|
h2 += h1;
|
|
|
|
h1 = murmur_fmix(h1);
|
|
h2 = murmur_fmix(h2);
|
|
|
|
h1 += h2;
|
|
h2 += h1;
|
|
|
|
return std::make_pair(h1,h2);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline std::pair<uint64,uint64> murmur_hash3_128bit (
|
|
const uint32& v1,
|
|
const uint32& v2,
|
|
const uint32& v3,
|
|
const uint32& v4
|
|
)
|
|
{
|
|
uint64 h1 = 0;
|
|
uint64 h2 = 0;
|
|
|
|
const uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
|
|
const uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
|
|
|
|
//----------
|
|
// body
|
|
|
|
uint64 k1 = (static_cast<uint64>(v2)<<32)|v1;
|
|
uint64 k2 = (static_cast<uint64>(v4)<<32)|v3;
|
|
|
|
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2;
|
|
|
|
h1 = DLIB_ROTL64(k1,27); h1 = h1*5+0x52dce729;
|
|
|
|
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1;
|
|
|
|
h2 = DLIB_ROTL64(k2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
|
|
|
//----------
|
|
// finalization
|
|
|
|
h1 ^= 16; h2 ^= 16;
|
|
|
|
h1 += h2;
|
|
h2 += h1;
|
|
|
|
h1 = murmur_fmix(h1);
|
|
h2 = murmur_fmix(h2);
|
|
|
|
h1 += h2;
|
|
h2 += h1;
|
|
|
|
return std::make_pair(h1,h2);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
inline std::pair<uint64,uint64> murmur_hash3_128bit_3 (
|
|
uint64 k1,
|
|
uint64 k2,
|
|
uint64 k3
|
|
)
|
|
{
|
|
uint64 h1 = k3;
|
|
uint64 h2 = k3;
|
|
|
|
const uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
|
|
const uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
|
|
|
|
//----------
|
|
// body
|
|
|
|
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
|
|
|
|
h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
|
|
|
|
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
|
|
|
|
h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
|
|
|
|
//----------
|
|
// finalization
|
|
|
|
h1 ^= 16; h2 ^= 16;
|
|
|
|
h1 += h2;
|
|
h2 += h1;
|
|
|
|
h1 = murmur_fmix(h1);
|
|
h2 = murmur_fmix(h2);
|
|
|
|
h1 += h2;
|
|
h2 += h1;
|
|
|
|
return std::make_pair(h1,h2);
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------------------
|
|
|
|
}
|
|
|
|
#endif // DLIB_MURMUR_HAsH_3_Hh_
|
|
|