Hash Funcion improvement

Hi guys

below is an implementation of a polynomial hash function, besides the huge waste of space ( yes I could have used a vector instead of an array but in order for this code to work I need a fixed size )

so how can I improve this hashing function,also could you give me a few examples of more secure or efficient hash functions

thanks!

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#include <iostream>

using namespace std;

int compression(int hash,int size){

   if(size % 2 == 0)
    size++;

    return hash % size;
}

int hashCode(string str,int bucketSize){

   int g = 31;
   int hash = 0;

   for(int i = 0; i < str.size(); ++i)
      hash = hash * g + str.at(i);

   return compression(hash,bucketSize);
}

template <class T>
struct Pair{

   string key;
   T element;
};

template <class T>
class Node{

  public:
      Node* next;
      Pair<T> pair;

      Node(Node* next,Pair<T> pair): next(next),pair(pair){}
};

template <class T>
class List{

   public:
       Node<T>* head;

       List(){ head = NULL;}

       void addNode(T key,T element){

           Pair<T> pr;
           pr.key = key;
           pr.element = element;
           Node<T>* newNode = new Node<T>(head,pr);
           head = newNode;
       }

       T search(string key){

          Node<T>* current = head;

          while(current != NULL){

            if(current->pair.key == key)
                return current->pair.element;

            current = current->next;
          }
         return T();
       }

       bool isEmpty(){ return (head == NULL);}
};

template <class T>
class HashTable{

public:

    static const int size = 100;
    List<T> arr[size];

    HashTable(){ }

    void add(T key,T element){

       const int compressionMap = hashCode(key,size);
       arr[compressionMap].addNode(key,element);
    }

    T find(T key){

        const int compressionMap = hashCode(key,size);

        if(arr[compressionMap].isEmpty()){
            return NULL;
        }
        return arr[compressionMap].search(key);
    }
};


int main()
{

   string keys[] = {"one","two","three","four","five","six","seven"};
   string words[] = {"adam","bill","mark","sarah","steve","mike","cain"};
   HashTable<string> ht;

   for(int i = 0; i < 7; ++i)
        ht.add(keys[i],words[i]);

   string found = ht.find("three");
   cout << found << endl;
}
https://www.google.com/search?q=what+makes+a+good+hash+function

Add this as a debug method.
1
2
3
for ( int i = 0 ; i < size ; i++ ) {
  cout << "Slot " << i << " has " << arr[i].size() << " entries" << endl;
}


Add a lot more pairs, and check whether you are getting a uniform distribution.
Secure hashes are something else -- see SHA256 or SHA-1 for some examples (sha-1 is old and no longer secure but its a starting place). For a hash-table, this is what I use for most of mine; for most data I have had good luck with it, once in a while it clusters on something and I will crank out a different one. The best advice I can give there is to test it on real data to see if it does well on that data. If it works on a large sample of your real data, it will probably do well on similar but not yet seen data too. For a hash table, secure hashes work but may be too slow: you want the fastest thing you can get that throws the data into the table with a nice distribution (few collisions).

reshash maxout is your table size, rehash is one way to handle collisions. Its 99% a lookup table with a fairly efficient loop at the end.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
uint64_t hash(void* vp, uint32_t bytesize, uint64_t maxout = -1, uint32_t rehash=1)
{
	static const uint64_t rab[256] = {	
	3238593523956797946ULL,6477187047913595892ULL,8435907220062204430ULL,12954374095827191784ULL,11472609148414072338ULL,
	16871814440124408860ULL,14327483619285186022ULL,16515860097293205755ULL,14539261057490653441ULL,13607494391182877455ULL,
	10387063993012335349ULL,6265406319754774291ULL,8791864835633305321ULL,1085550678754862311ULL,2585467722461443357ULL,
	5247393906202824413ULL,7215812591205457703ULL,1239030555549527337ULL,4449591751341063379ULL,18092457712352332085ULL,
	15556728100436498639ULL,11742789833002527425ULL,10234164645493242683ULL,12530812639509548582ULL,9302088354573213660ULL,
	17583729671266610642ULL,15633189885995973672ULL,2171101357509724622ULL,3661574416647526452ULL,5170935444922886714ULL,
	7724537325157989312ULL,10494787812405648826ULL,13642865964979244096ULL,14431625182410915406ULL,16480541316673728436ULL,
	2478061111099054674ULL,1049933365183482792ULL,8899183502682126758ULL,6300970840149272668ULL,8399466921467862337ULL,
	6368420890995002555ULL,3275086581351513781ULL,108854135608684367ULL,14364169659802000041ULL,16980263386864569171ULL,
	11435870349096892765ULL,12845837170396948647ULL,15669858317114364775ULL,17692196227407282845ULL,9265331945857609875ULL,
	12422293323479818601ULL,7688114635962061967ULL,5062151678603773301ULL,3698085083440658299ULL,2279937883717887617ULL,
	4342202715019449244ULL,1203395666939462246ULL,7323148833295052904ULL,5282940851558637970ULL,10341870889845773428ULL,
	11778178981837571470ULL,15449074650315978624ULL,18057156506771531386ULL,11669866394404287583ULL,10160817855121008037ULL,
	17874829710049597355ULL,15339802717267265105ULL,1311848476550706103ULL,4523114428088083021ULL,5464845951130112067ULL,
	7432843562972398009ULL,4956122222198109348ULL,7509300761534850398ULL,2099866730366965584ULL,3591042414950500010ULL,
	17798367005364253516ULL,15848531969535615670ULL,12601941680298545336ULL,9372796311334617410ULL,16798933842935724674ULL,
	14253900473960229752ULL,12736841781990005110ULL,11255500115345754252ULL,6550173162703027562ULL,8509314479008689296ULL,
	217708271217368734ULL,3455596968422674276ULL,870833084869474937ULL,2370047569572014979ULL,6194214610827729293ULL,
	8721096401170761847ULL,13822387873690697105ULL,10602378625989962859ULL,16587157392570359397ULL,14609853536892473247ULL,
	3483332339477899749ULL,2064482512161650719ULL,7616958077116566033ULL,4991418462803860459ULL,9480190278288059917ULL,
	12637572737790640119ULL,15741190762473065977ULL,17762823925471730691ULL,15376229271924123934ULL,17983608511393921252ULL,
	10124303357207546602ULL,11561034798826117904ULL,7396170166881316598ULL,5356383260452470540ULL,4559875767435775234ULL,
	1420363961462201592ULL,8684405430038898488ULL,6085769495188764354ULL,2406791333878924492ULL,979366144819647798ULL,
	14646297666590105808ULL,16695918618875998506ULL,10565881703117275940ULL,13713538703073841886ULL,11362911691697612739ULL,
	12772455230081578553ULL,14146576876296094775ULL,16763373153642681805ULL,3347869283551649835ULL,182341662412566993ULL,
	8616954185191982047ULL,6585487012709290533ULL,13933329357911598997ULL,17126321439046432367ULL,11006435164953838689ULL,
	12992741788688209307ULL,8257930048646602877ULL,6803747195591438727ULL,3132703159877387145ULL,542775339377431155ULL,
	2623696953101412206ULL,619515277774763668ULL,9046228856176166042ULL,5871394916501263712ULL,10929691902260224134ULL,
	13501751302614184316ULL,14865687125944796018ULL,16338017159720129160ULL,9912244444396218696ULL,11925134239902742706ULL,
	15018601523069700796ULL,18202706530865158982ULL,4199733460733931168ULL,1637543290675756890ULL,7182084829901000020ULL,
	5717935174548446382ULL,7834929158557182387ULL,4632665972928804937ULL,3844057317981030983ULL,1849042541720329149ULL,
	16103865201353027163ULL,17549867708331900833ULL,9700748483321744815ULL,12280807109898935381ULL,5834933197202143791ULL,
	8937414855024798677ULL,655924238275353051ULL,2732422975565056033ULL,16374796089197559239ULL,14974255385173568573ULL,
	13465025131935292979ULL,10821211621719183305ULL,13100346325406055124ULL,11041713811386575662ULL,17018628958017378592ULL,
	13897997918303815898ULL,435416542434737468ULL,3097107305413864646ULL,6911193936845348552ULL,8293578696285179698ULL,
	1741666169738949874ULL,3808479038558283016ULL,4740095139144029958ULL,7870595381236532988ULL,12388429221655458586ULL,
	9736009554713699040ULL,17442192802341523694ULL,16068516186704462100ULL,18239503069743100937ULL,15127152172900050419ULL,
	11888425678624364541ULL,9803746554456753671ULL,5681455845848806369ULL,7073288438148047387ULL,1673934641775824917ULL,
	4308477092595991023ULL,6966664678955799498ULL,5503217582476919344ULL,4128965024323301438ULL,1566351579938693572ULL,
	15233916154233132066ULL,18417600011429070296ULL,9982836925607720918ULL,11996431537128302124ULL,9627165335515697969ULL,
	12207926510359495371ULL,15886756170769674437ULL,17332335396841578815ULL,3917464579278591193ULL,1922028658990515491ULL,
	8051932600676513581ULL,4850374241660872407ULL,2917466598601071895ULL,327962119137676525ULL,8187398044598779619ULL,
	6732512565967646489ULL,11221777246008269567ULL,13207379120439233285ULL,14004037317153847563ULL,17197450482186430705ULL,
	14792340333762633196ULL,16265093719173729302ULL,10712766520904941080ULL,13284123302255603682ULL,9119751534871550468ULL,
	5944212839312182270ULL,2840727922924403184ULL,836967320887912458ULL,17368810860077796976ULL,15995557527495450506ULL,
	12171538990377528708ULL,9518416773021940862ULL,4813582667757848984ULL,7943378085384837218ULL,1958732289639295596ULL,
	4025966300338256790ULL,1458733299300535947ULL,4093699022299389809ULL,5610888623004134783ULL,7002018658576923781ULL,
	12103802978479819107ULL,10018419036150929561ULL,18310175810188503703ULL,15198246066092718957ULL,13391477134206599341ULL,
	10748366240846565719ULL,16157651908532642649ULL,14756687855020634787ULL,729366649650267973ULL,2805444311502067391ULL,
	6051901489239909553ULL,9155087905094251851ULL,6695738567103299670ULL,8078825954266321324ULL,364683324825133986ULL,
	3025950744619954776ULL,17233908370383964094ULL,14112856248920397380ULL,13170974025418581066ULL,11113046258555286960ULL,
	4513414715797952618ULL
	};

	uint64_t fp = 0xc15d213aa4d7a795ULL;
	unsigned char* buf = (unsigned char*) vp;
	
  for (int i = 0; i < bytesize*rehash; i++)
    fp = (fp >> 8) ^ rab[(unsigned char)(fp ^ buf[i%bytesize])];
  return fp%maxout;
}


Your hash function exhibits undefined behavior for typical strings of any substantial length.

To avoid it, do all your math modulo sufficiently small M to avoid signed integer overflow. This is a rare case where the behavior of unsigned integers is actually desirable: operations on unsigned integer types are implicitly performed modulo 2^n.

UB is always a problem because the compiler may optimize your code assuming it never occurs.
Last edited on
mine or his?
His!
Topic archived. No new replies allowed.