字符串哈希的哈希函数

  |  

摘要: 字符串哈希的哈希函数

【对算法,数学,计算机感兴趣的同学,欢迎关注我哈,阅读更多原创文章】
我的网站:潮汐朝夕的生活实验室
我的公众号:算法题刷刷
我的知乎:潮汐朝夕
我的github:FennelDumplings
我的leetcode:FennelDumplings


在文章 字符串哈希 中,我们探讨了字符串哈希,及其在字符串相关的经典问题中的应用。

字符串哈希是哈希中最常见的形式,在搜索引擎创建索引的时候大量的计算消耗在哈希生成和查找上面,因此选择一个又快又好的哈希函数对性能提升至关重要

本文记录几种字符串哈希中常用的哈希函数,这些哈希函数都是网上零散文章中摘录的,有一些在 大数据应用中的概率算法与数据结构 中有提到,有一些在 Redis 等框架中有用到。

这里只记录代码,留个印象,没有记录相关论文或相关框架中的文档,下面的参考链接中非常全面地记录了这些哈希函数的细节,值得学习。

参考链接: General Purpose Hash Function Algorithms

SDBM Hash

1
2
3
4
5
6
7
8
9
10
11
12
13
//SDBMHash
unsigned int SDBMHash(char *str)
{
unsigned int hash = 0;

while (*str)
{
// equivalent to: hash = 65599*hash + (*str++);
hash = (*str++) + (hash << 6) + (hash << 16) - hash;
}

return (hash & 0x7FFFFFFF);
}

RS Hash

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
// RS Hash Function
unsigned int RSHash(char *str)
{
unsigned int b = 378551;
unsigned int a = 63689;
unsigned int hash = 0;

while (*str)
{
hash = hash * a + (*str++);
a *= b;
}

return (hash & 0x7FFFFFFF);
}

JS Hash

1
2
3
4
5
6
7
8
9
10
11
12
// JS Hash Function
unsigned int JSHash(char *str)
{
unsigned int hash = 1315423911;

while (*str)
{
hash ^= ((hash << 5) + (*str++) + (hash >> 2));
}

return (hash & 0x7FFFFFFF);
}

P. J. Weinberger Hash

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
// P. J. Weinberger Hash Function
unsigned int PJWHash(char *str)
{
unsigned int BitsInUnignedInt = (unsigned int)(sizeof(unsigned int) * 8);
unsigned int ThreeQuarters = (unsigned int)((BitsInUnignedInt * 3) / 4);
unsigned int OneEighth = (unsigned int)(BitsInUnignedInt / 8);
unsigned int HighBits = (unsigned int)(0xFFFFFFFF) << (BitsInUnignedInt - OneEighth);
unsigned int hash = 0;
unsigned int test = 0;

while (*str)
{
hash = (hash << OneEighth) + (*str++);
if ((test = hash & HighBits) != 0)
{
hash = ((hash ^ (test >> ThreeQuarters)) & (~HighBits));
}
}

return (hash & 0x7FFFFFFF);
}

ELF Hash

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// ELF Hash Function
unsigned int ELFHash(char *str)
{
unsigned int hash = 0;
unsigned int x = 0;

while (*str)
{
hash = (hash << 4) + (*str++);
if ((x = hash & 0xF0000000L) != 0)
{
hash ^= (x >> 24);
hash &= ~x;
}
}

return (hash & 0x7FFFFFFF);
}

BKDR Hash

1
2
3
4
5
6
7
8
9
10
11
12
13
// BKDR Hash Function
unsigned int BKDRHash(char *str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
unsigned int hash = 0;

while (*str)
{
hash = hash * seed + (*str++);
}

return (hash & 0x7FFFFFFF);
}

DJB Hash

1
2
3
4
5
6
7
8
9
10
11
12
// DJB Hash Function
unsigned int DJBHash(char *str)
{
unsigned int hash = 5381;

while (*str)
{
hash += (hash << 5) + (*str++);
}

return (hash & 0x7FFFFFFF);
}

AP Hash

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
// AP Hash Function
unsigned int APHash(char *str)
{
unsigned int hash = 0;
int i;

for (i=0; *str; i++)
{
if ((i & 1) == 0)
{
hash ^= ((hash << 7) ^ (*str++) ^ (hash >> 3));
}
else
{
hash ^= (~((hash << 11) ^ (*str++) ^ (hash >> 5)));
}
}

return (hash & 0x7FFFFFFF);
}

Share